diff options
author | eschrock <none@none> | 2006-05-30 15:47:16 -0700 |
---|---|---|
committer | eschrock <none@none> | 2006-05-30 15:47:16 -0700 |
commit | 99653d4ee642c6528e88224f12409a5f23060994 (patch) | |
tree | 5cbcc540b8ed86b6a008f1084f9ca031368d926f | |
parent | 354a1801a85aa6b61ff4d5e290ab708ba57e56a3 (diff) | |
download | illumos-joyent-99653d4ee642c6528e88224f12409a5f23060994.tar.gz |
PSARC 2006/223 ZFS Hot Spares
PSARC 2006/303 ZFS Clone Promotion
6276916 support for "clone swap"
6288488 du reports misleading size on RAID-Z
6393490 libzfs should be a real library
6397148 fbufs debug code should be removed from buf_hash_insert()
6405966 Hot Spare support in ZFS
6409302 passing a non-root vdev via zpool_create() panics system
6415739 assertion failed: !(zio->io_flags & 0x00040)
6416759 ::dbufs does not find bonus buffers anymore
6417978 double parity RAID-Z a.k.a. RAID6
6424554 full block re-writes need not read data in
6425111 detaching an offline device can result in import confusion
81 files changed, 6423 insertions, 3226 deletions
diff --git a/usr/src/cmd/fm/modules/common/Makefile b/usr/src/cmd/fm/modules/common/Makefile index 868a66df08..ef5ebba6f0 100644 --- a/usr/src/cmd/fm/modules/common/Makefile +++ b/usr/src/cmd/fm/modules/common/Makefile @@ -27,6 +27,6 @@ # SUBDIRS = cpumem-retire eversholt io-retire ip-transport snmp-trapgen \ - syslog-msgs zfs-diagnosis + syslog-msgs zfs-diagnosis zfs-retire include ../../Makefile.subdirs diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/Makefile b/usr/src/cmd/fm/modules/common/zfs-retire/Makefile new file mode 100644 index 0000000000..9d80ae77ee --- /dev/null +++ b/usr/src/cmd/fm/modules/common/zfs-retire/Makefile @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = zfs-retire +CLASS = common +SRCS = zfs_retire.c + +include ../../Makefile.plugin + +LDLIBS += -lzfs diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf new file mode 100644 index 0000000000..f506384bff --- /dev/null +++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf @@ -0,0 +1,29 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# fmd configuration file for the zfs retire agent. +# +subscribe fault.fs.zfs.device diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c new file mode 100644 index 0000000000..962b37bb82 --- /dev/null +++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c @@ -0,0 +1,231 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The ZFS retire agent is responsible for managing hot spares across all pools. + * When we see a device fault, we try to open the associated pool and look for + * any hot spares. We iterate over any available hot spares and attempt a + * 'zpool replace' for each one. + */ + +#include <fm/fmd_api.h> +#include <sys/fs/zfs.h> +#include <sys/fm/protocol.h> +#include <sys/fm/fs/zfs.h> +#include <libzfs.h> + +/* + * Find a pool with a matching GUID. + */ +typedef struct find_cbdata { + uint64_t cb_guid; + zpool_handle_t *cb_zhp; +} find_cbdata_t; + +static int +find_pool(zpool_handle_t *zhp, void *data) +{ + find_cbdata_t *cbp = data; + + if (cbp->cb_guid == zpool_get_guid(zhp)) { + cbp->cb_zhp = zhp; + return (1); + } + + zpool_close(zhp); + return (0); +} + +/* + * Find a vdev within a tree with a matching GUID. + */ +static nvlist_t * +find_vdev(nvlist_t *nv, uint64_t search) +{ + uint64_t guid; + nvlist_t **child; + uint_t c, children; + nvlist_t *ret; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && + guid == search) + return (nv); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return (NULL); + + for (c = 0; c < children; c++) { + if ((ret = find_vdev(child[c], search)) != NULL) + return (ret); + } + + return (NULL); +} + +/*ARGSUSED*/ +static void +zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, + const char *class) +{ + uint64_t pool_guid, vdev_guid; + char *dev_name; + zpool_handle_t *zhp; + nvlist_t *resource, *config, *nvroot; + nvlist_t *vdev; + nvlist_t **spares, **faults; + uint_t s, nspares, f, nfaults; + nvlist_t *replacement; + find_cbdata_t cb; + libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); + + /* + * Get information from the fault. + */ + if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, + &faults, &nfaults) != 0) + return; + + for (f = 0; f < nfaults; f++) { + if (nvlist_lookup_nvlist(faults[f], FM_FAULT_RESOURCE, + &resource) != 0 || + nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, + &pool_guid) != 0 || + nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, + &vdev_guid) != 0) + continue; + + /* + * From the pool guid and vdev guid, get the pool name and + * device name. + */ + cb.cb_guid = pool_guid; + if (zpool_iter(zhdl, find_pool, &cb) != 1) + continue; + + zhp = cb.cb_zhp; + config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) != 0) { + zpool_close(zhp); + continue; + } + + if ((vdev = find_vdev(nvroot, vdev_guid)) == NULL) { + zpool_close(zhp); + continue; + } + + /* + * Find out if there are any hot spares available in the pool. + */ + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) != 0) { + zpool_close(zhp); + continue; + } + + if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) { + zpool_close(zhp); + continue; + } + + if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_ROOT) != 0) { + nvlist_free(replacement); + zpool_close(zhp); + continue; + } + + dev_name = zpool_vdev_name(zhdl, zhp, vdev); + + /* + * Try to replace each spare, ending when we successfully + * replace it. + */ + for (s = 0; s < nspares; s++) { + char *spare_name; + + if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, + &spare_name) != 0) + continue; + + if (nvlist_add_nvlist_array(replacement, + ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0) + continue; + + if (zpool_vdev_attach(zhp, dev_name, spare_name, + replacement, B_TRUE) == 0) + break; + } + + free(dev_name); + nvlist_free(replacement); + zpool_close(zhp); + } +} + +static const fmd_hdl_ops_t fmd_ops = { + zfs_retire_recv, /* fmdo_recv */ + NULL, /* fmdo_timeout */ + NULL, /* fmdo_close */ + NULL, /* fmdo_stats */ + NULL, /* fmdo_gc */ +}; + +static const fmd_prop_t fmd_props[] = { + { NULL, 0, NULL } +}; + +static const fmd_hdl_info_t fmd_info = { + "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props +}; + +void +_fmd_init(fmd_hdl_t *hdl) +{ + libzfs_handle_t *zhdl; + + if ((zhdl = libzfs_init()) == NULL) + return; + + if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { + libzfs_fini(zhdl); + return; + } + + fmd_hdl_setspecific(hdl, zhdl); +} + +void +_fmd_fini(fmd_hdl_t *hdl) +{ + libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); + + if (zhdl != NULL) + libzfs_fini(zhdl); +} diff --git a/usr/src/cmd/fm/schemes/zfs/scheme.c b/usr/src/cmd/fm/schemes/zfs/scheme.c index 7f2532a637..e28f7b231c 100644 --- a/usr/src/cmd/fm/schemes/zfs/scheme.c +++ b/usr/src/cmd/fm/schemes/zfs/scheme.c @@ -34,6 +34,8 @@ typedef struct cbdata { zpool_handle_t *cb_pool; } cbdata_t; +libzfs_handle_t *g_zfs; + static int find_pool(zpool_handle_t *zhp, void *data) { @@ -66,7 +68,7 @@ fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen) cb.cb_guid = pool_guid; cb.cb_pool = NULL; - if (zpool_iter(find_pool, &cb) == 1) { + if (zpool_iter(g_zfs, find_pool, &cb) == 1) { name = zpool_get_name(cb.cb_pool); } else { (void) snprintf(guidbuf, sizeof (guidbuf), "%llx", pool_guid); @@ -135,7 +137,7 @@ fmd_fmri_present(nvlist_t *nvl) cb.cb_guid = pool_guid; cb.cb_pool = NULL; - if (zpool_iter(find_pool, &cb) != 1) + if (zpool_iter(g_zfs, find_pool, &cb) != 1) return (0); if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) != 0) { @@ -163,7 +165,7 @@ fmd_fmri_unusable(nvlist_t *nvl) cb.cb_guid = pool_guid; cb.cb_pool = NULL; - if (zpool_iter(find_pool, &cb) != 1) + if (zpool_iter(g_zfs, find_pool, &cb) != 1) return (1); if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) != 0) { @@ -189,3 +191,21 @@ fmd_fmri_unusable(nvlist_t *nvl) return (ret); } + +int +fmd_fmri_init(void) +{ + g_zfs = libzfs_init(); + + if (g_zfs == NULL) + return (-1); + else + return (0); +} + +void +fmd_fmri_fini(void) +{ + if (g_zfs) + libzfs_fini(g_zfs); +} diff --git a/usr/src/cmd/fs.d/df.c b/usr/src/cmd/fs.d/df.c index 0a38f44b1a..3ee66576a5 100644 --- a/usr/src/cmd/fs.d/df.c +++ b/usr/src/cmd/fs.d/df.c @@ -237,55 +237,43 @@ static void do_df(int, char **) __NORETURN; static void parse_options(int, char **); static char *basename(char *); - -/* ARGSUSED */ -static void -dummy_error_handler(const char *fmt, va_list ap) -{ - /* Do nothing */ -} - -static zfs_handle_t *(*_zfs_open)(const char *, int); +static libzfs_handle_t *(*_libzfs_init)(boolean_t); +static zfs_handle_t *(*_zfs_open)(libzfs_handle_t *, const char *, int); static void (*_zfs_close)(zfs_handle_t *); static uint64_t (*_zfs_prop_get_int)(zfs_handle_t *, zfs_prop_t); -static void (*_zfs_set_error_handler)(void (*)(const char *, va_list)); +static libzfs_handle_t *g_zfs; /* * Dynamically check for libzfs, in case the user hasn't installed the SUNWzfs * packages. A basic utility such as df shouldn't depend on optional * filesystems. */ -static int +static boolean_t load_libzfs(void) { void *hdl; - if (_zfs_open != NULL) - return (1); + if (_libzfs_init != NULL) + return (g_zfs != NULL); if ((hdl = dlopen("libzfs.so", RTLD_LAZY)) != NULL) { - _zfs_set_error_handler = (void (*)()) - dlsym(hdl, "zfs_set_error_handler"); + _libzfs_init = (libzfs_handle_t *(*)(boolean_t))dlsym(hdl, + "libzfs_init"); _zfs_open = (zfs_handle_t *(*)())dlsym(hdl, "zfs_open"); _zfs_close = (void (*)())dlsym(hdl, "zfs_close"); _zfs_prop_get_int = (uint64_t (*)()) dlsym(hdl, "zfs_prop_get_int"); - if (_zfs_set_error_handler != NULL) { + if (_libzfs_init != NULL) { assert(_zfs_open != NULL); assert(_zfs_close != NULL); assert(_zfs_prop_get_int != NULL); - /* - * Disable ZFS error reporting, so we don't get messages - * like "can't open ..." under race conditions. - */ - _zfs_set_error_handler(dummy_error_handler); - return (1); + g_zfs = _libzfs_init(B_FALSE); } } - return (0); + return (g_zfs != NULL); } int @@ -1257,7 +1245,7 @@ adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total, do { *slash = '\0'; - if ((zhp = _zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) { + if ((zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL) { free(dataset); return; } @@ -1274,7 +1262,7 @@ adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total, } while ((slash = strrchr(dataset, '/')) != NULL); - if ((zhp = _zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) { + if ((zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL) { free(dataset); return; } diff --git a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c index 26376e36a6..6a8585d872 100644 --- a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c +++ b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c @@ -142,7 +142,8 @@ main(int argc, char **argv) return (1); } - if ((config = zpool_read_label(fd)) == NULL) + if (zpool_read_label(fd, &config) != 0 || + config == NULL) return (1); if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 5b218aee5f..73b1cbef62 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -208,73 +208,6 @@ freelist_walk_fini(mdb_walk_state_t *wsp) { } -typedef struct dbuf_walk_data { - dbuf_hash_table_t ht; - int64_t bucket; - uintptr_t dbp; - dmu_buf_impl_t db; -} dbuf_walk_data_t; - -static int -dbuf_walk_init(mdb_walk_state_t *wsp) -{ - dbuf_walk_data_t *dwd; - - if (wsp->walk_addr != NULL) { - mdb_warn("must supply starting address\n"); - return (WALK_ERR); - } - - dwd = mdb_alloc(sizeof (dbuf_walk_data_t), UM_SLEEP); - - if (mdb_readvar(&dwd->ht, "dbuf_hash_table") == -1) { - mdb_warn("failed to read 'dbuf_hash_table'"); - mdb_free(dwd, sizeof (dbuf_walk_data_t)); - return (WALK_ERR); - } - dwd->bucket = -1; - dwd->dbp = 0; - wsp->walk_data = dwd; - return (WALK_NEXT); -} - -static int -dbuf_walk_step(mdb_walk_state_t *wsp) -{ - int status; - dbuf_walk_data_t *dwd = wsp->walk_data; - - while (dwd->dbp == 0) { - dwd->bucket++; - if (dwd->bucket == dwd->ht.hash_table_mask+1) - return (WALK_DONE); - - if (mdb_vread(&dwd->dbp, sizeof (void *), - (uintptr_t)(dwd->ht.hash_table+dwd->bucket)) == -1) { - mdb_warn("failed to read hash bucket %u at %p", - dwd->bucket, dwd->ht.hash_table+dwd->bucket); - return (WALK_DONE); - } - } - - wsp->walk_addr = dwd->dbp; - if (mdb_vread(&dwd->db, sizeof (dmu_buf_impl_t), - wsp->walk_addr) == -1) { - mdb_warn("failed to read dbuf at %p", wsp->walk_addr); - return (WALK_DONE); - } - status = wsp->walk_callback(wsp->walk_addr, &dwd->db, wsp->walk_cbdata); - - dwd->dbp = (uintptr_t)dwd->db.db_hash_next; - return (status); -} - -static void -dbuf_walk_fini(mdb_walk_state_t *wsp) -{ - dbuf_walk_data_t *dwd = wsp->walk_data; - mdb_free(dwd, sizeof (dbuf_walk_data_t)); -} static int dataset_name(uintptr_t addr, char *buf) @@ -693,7 +626,7 @@ dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_ERR); } - if (mdb_pwalk("dbufs", dbufs_cb, &data, 0) != 0) { + if (mdb_pwalk("dmu_buf_impl_t", dbufs_cb, &data, 0) != 0) { mdb_warn("can't walk dbufs"); return (DCMD_ERR); } @@ -1580,8 +1513,6 @@ static const mdb_walker_t walkers[] = { { LIST_WALK_NAME, LIST_WALK_DESC, list_walk_init, list_walk_step, list_walk_fini }, #endif - { "dbufs", "walk cached ZFS dbufs", - dbuf_walk_init, dbuf_walk_step, dbuf_walk_fini }, { "zms_freelist", "walk ZFS metaslab freelist", freelist_walk_init, freelist_walk_step, freelist_walk_fini }, { "txg_list", "given any txg_list_t *, walk all entries in all txgs", diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index 4e808b8e48..37e79f6322 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -937,6 +937,8 @@ const struct ioc { "zfs_cmd_t" }, { (uint_t)ZFS_IOC_BOOKMARK_NAME, "ZFS_IOC_BOOKMARK_NAME", "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_PROMOTE, "ZFS_IOC_PROMOTE", + "zfs_cmd_t" }, /* kssl ioctls */ { (uint_t)KSSL_ADD_ENTRY, "KSSL_ADD_ENTRY", diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index f283148ef8..0af9a59690 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -744,8 +744,8 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) (u_longlong_t)ds->ds_fsid_guid); (void) printf("\t\tguid = %llu\n", (u_longlong_t)ds->ds_guid); - (void) printf("\t\tinconsistent = %llu\n", - (u_longlong_t)ds->ds_inconsistent); + (void) printf("\t\tflags = %llx\n", + (u_longlong_t)ds->ds_flags); (void) printf("\t\tbp = %s\n", blkbuf); } @@ -755,7 +755,9 @@ dump_bplist(objset_t *mos, uint64_t object, char *name) bplist_t bpl = { 0 }; blkptr_t blk, *bp = &blk; uint64_t itor = 0; - char numbuf[6]; + char bytes[6]; + char comp[6]; + char uncomp[6]; if (dump_opt['d'] < 3) return; @@ -766,10 +768,17 @@ dump_bplist(objset_t *mos, uint64_t object, char *name) return; } - nicenum(bpl.bpl_phys->bpl_bytes, numbuf); - - (void) printf("\n %s: %llu entries, %s\n", - name, (u_longlong_t)bpl.bpl_phys->bpl_entries, numbuf); + nicenum(bpl.bpl_phys->bpl_bytes, bytes); + if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) { + nicenum(bpl.bpl_phys->bpl_comp, comp); + nicenum(bpl.bpl_phys->bpl_uncomp, uncomp); + (void) printf("\n %s: %llu entries, %s (%s/%s comp)\n", + name, (u_longlong_t)bpl.bpl_phys->bpl_entries, + bytes, comp, uncomp); + } else { + (void) printf("\n %s: %llu entries, %s\n", + name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes); + } if (dump_opt['d'] < 5) { bplist_close(&bpl); diff --git a/usr/src/cmd/zfs/zfs_iter.c b/usr/src/cmd/zfs/zfs_iter.c index bc8e5ea59c..9f8f37b765 100644 --- a/usr/src/cmd/zfs/zfs_iter.c +++ b/usr/src/cmd/zfs/zfs_iter.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -160,7 +159,7 @@ zfs_compare(const void *larg, const void *rarg, void *unused) } int -zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types, +zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types, zfs_iter_f callback, void *data) { callback_data_t cb; @@ -190,7 +189,7 @@ zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types, * If given no arguments, iterate over all datasets. */ cb.cb_recurse = 1; - ret = zfs_iter_root(zfs_callback, &cb); + ret = zfs_iter_root(g_zfs, zfs_callback, &cb); } else { int i; zfs_handle_t *zhp; @@ -209,8 +208,8 @@ zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types, } for (i = 0; i < argc; i++) { - if ((zhp = zfs_open(argv[i], argtype)) != NULL) - ret = zfs_callback(zhp, &cb); + if ((zhp = zfs_open(g_zfs, argv[i], argtype)) != NULL) + ret |= zfs_callback(zhp, &cb); else ret = 1; } diff --git a/usr/src/cmd/zfs/zfs_iter.h b/usr/src/cmd/zfs/zfs_iter.h index 03428b827b..c69049b28f 100644 --- a/usr/src/cmd/zfs/zfs_iter.h +++ b/usr/src/cmd/zfs/zfs_iter.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,7 +32,7 @@ extern "C" { #endif -int zfs_for_each(int, char **, int, zfs_type_t, zfs_iter_f, void *); +int zfs_for_each(int, char **, boolean_t, zfs_type_t, zfs_iter_f, void *); #ifdef __cplusplus } diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c index 5b04a76f47..0fba9046bd 100644 --- a/usr/src/cmd/zfs/zfs_main.c +++ b/usr/src/cmd/zfs/zfs_main.c @@ -47,6 +47,9 @@ #include <libzfs.h> #include "zfs_iter.h" +#include "zfs_util.h" + +libzfs_handle_t *g_zfs; static FILE *mnttab_file; @@ -66,6 +69,7 @@ static int zfs_do_share(int argc, char **argv); static int zfs_do_unshare(int argc, char **argv); static int zfs_do_send(int argc, char **argv); static int zfs_do_receive(int argc, char **argv); +static int zfs_do_promote(int argc, char **argv); /* * These libumem hooks provide a reasonable set of defaults for the allocator's @@ -91,6 +95,7 @@ typedef enum { HELP_INHERIT, HELP_LIST, HELP_MOUNT, + HELP_PROMOTE, HELP_RECEIVE, HELP_RENAME, HELP_ROLLBACK, @@ -124,6 +129,7 @@ static zfs_command_t command_table[] = { { "snapshot", zfs_do_snapshot, HELP_SNAPSHOT }, { "rollback", zfs_do_rollback, HELP_ROLLBACK }, { "clone", zfs_do_clone, HELP_CLONE }, + { "promote", zfs_do_promote, HELP_PROMOTE }, { "rename", zfs_do_rename, HELP_RENAME }, { NULL }, { "list", zfs_do_list, HELP_LIST }, @@ -176,6 +182,8 @@ get_usage(zfs_help_t idx) return (gettext("\tmount\n" "\tmount [-o opts] [-O] -a\n" "\tmount [-o opts] [-O] <filesystem>\n")); + case HELP_PROMOTE: + return (gettext("\tpromote <clone filesystem>\n")); case HELP_RECEIVE: return (gettext("\treceive [-vn] <filesystem|volume|snapshot>\n" "\treceive [-vn] -d <filesystem>\n")); @@ -228,10 +236,10 @@ safe_malloc(size_t size) * a complete usage message. */ static void -usage(int requested) +usage(boolean_t requested) { int i; - int show_properties = FALSE; + boolean_t show_properties = B_FALSE; FILE *fp = requested ? stdout : stderr; if (current_command == NULL) { @@ -260,7 +268,7 @@ usage(int requested) strcmp(current_command->name, "get") == 0 || strcmp(current_command->name, "inherit") == 0 || strcmp(current_command->name, "list") == 0) - show_properties = TRUE; + show_properties = B_TRUE; if (show_properties) { @@ -313,27 +321,27 @@ zfs_do_clone(int argc, char **argv) if (argc > 1 && argv[1][0] == '-') { (void) fprintf(stderr, gettext("invalid option '%c'\n"), argv[1][1]); - usage(FALSE); + usage(B_FALSE); } /* check number of arguments */ if (argc < 2) { (void) fprintf(stderr, gettext("missing source dataset " "argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 3) { (void) fprintf(stderr, gettext("missing target dataset " "argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 3) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } /* open the source dataset */ - if ((zhp = zfs_open(argv[1], ZFS_TYPE_SNAPSHOT)) == NULL) + if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_SNAPSHOT)) == NULL) return (1); /* pass to libzfs */ @@ -341,7 +349,7 @@ zfs_do_clone(int argc, char **argv) /* create the mountpoint if necessary */ if (ret == 0) { - zfs_handle_t *clone = zfs_open(argv[2], ZFS_TYPE_ANY); + zfs_handle_t *clone = zfs_open(g_zfs, argv[2], ZFS_TYPE_ANY); if (clone != NULL) { if ((ret = zfs_mount(clone, NULL, 0)) == 0) ret = zfs_share(clone); @@ -374,7 +382,7 @@ zfs_do_create(int argc, char **argv) char *size = NULL; char *blocksize = NULL; int c; - int noreserve = FALSE; + boolean_t noreserve = B_FALSE; int ret; /* check options */ @@ -388,24 +396,24 @@ zfs_do_create(int argc, char **argv) blocksize = optarg; break; case 's': - noreserve = TRUE; + noreserve = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing size " "argument\n")); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } if (noreserve && type != ZFS_TYPE_VOLUME) { (void) fprintf(stderr, gettext("'-s' can only be used when " "creating a volume\n")); - usage(FALSE); + usage(B_FALSE); } argc -= optind; @@ -415,18 +423,18 @@ zfs_do_create(int argc, char **argv) if (argc == 0) { (void) fprintf(stderr, gettext("missing %s argument\n"), zfs_type_to_name(type)); - usage(FALSE); + usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } /* pass to libzfs */ - if (zfs_create(argv[0], type, size, blocksize) != 0) + if (zfs_create(g_zfs, argv[0], type, size, blocksize) != 0) return (1); - if ((zhp = zfs_open(argv[0], ZFS_TYPE_ANY)) == NULL) + if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL) return (1); /* @@ -476,7 +484,7 @@ zfs_do_create(int argc, char **argv) * either be a child, or a clone of a child. */ typedef struct destroy_cbdata { - int cb_first; + boolean_t cb_first; int cb_force; int cb_recurse; int cb_error; @@ -511,7 +519,7 @@ destroy_check_dependent(zfs_handle_t *zhp, void *data) zfs_type_to_name(zfs_get_type(cbp->cb_target))); (void) fprintf(stderr, gettext("use '-r' to destroy " "the following datasets:\n")); - cbp->cb_first = 0; + cbp->cb_first = B_FALSE; cbp->cb_error = 1; } @@ -532,7 +540,7 @@ destroy_check_dependent(zfs_handle_t *zhp, void *data) zfs_type_to_name(zfs_get_type(cbp->cb_target))); (void) fprintf(stderr, gettext("use '-R' to destroy " "the following datasets:\n")); - cbp->cb_first = 0; + cbp->cb_first = B_FALSE; cbp->cb_error = 1; } @@ -597,7 +605,7 @@ zfs_do_destroy(int argc, char **argv) default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -607,15 +615,15 @@ zfs_do_destroy(int argc, char **argv) /* check number of arguments */ if (argc == 0) { (void) fprintf(stderr, gettext("missing path argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } /* Open the given dataset */ - if ((zhp = zfs_open(argv[0], ZFS_TYPE_ANY)) == NULL) + if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL) return (1); cb.cb_target = zhp; @@ -641,7 +649,7 @@ zfs_do_destroy(int argc, char **argv) /* * Check for any dependents and/or clones. */ - cb.cb_first = 1; + cb.cb_first = B_TRUE; if (!cb.cb_doclones) (void) zfs_iter_dependents(zhp, destroy_check_dependent, &cb); @@ -678,13 +686,13 @@ zfs_do_destroy(int argc, char **argv) * columns to display as well as which property types to allow. */ typedef struct get_cbdata { - int cb_scripted; int cb_sources; - int cb_literal; int cb_columns[4]; - zfs_prop_t cb_prop[ZFS_NPROP_ALL]; int cb_nprop; - int cb_isall; + boolean_t cb_scripted; + boolean_t cb_literal; + boolean_t cb_isall; + zfs_prop_t cb_prop[ZFS_NPROP_ALL]; } get_cbdata_t; #define GET_COL_NAME 1 @@ -804,7 +812,7 @@ static int zfs_do_get(int argc, char **argv) { get_cbdata_t cb = { 0 }; - int recurse = 0; + boolean_t recurse = B_FALSE; int c; char *value, *fields, *badopt; int i; @@ -823,18 +831,18 @@ zfs_do_get(int argc, char **argv) while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) { switch (c) { case 'p': - cb.cb_literal = TRUE; + cb.cb_literal = B_TRUE; break; case 'r': - recurse = TRUE; + recurse = B_TRUE; break; case 'H': - cb.cb_scripted = TRUE; + cb.cb_scripted = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case 'o': /* @@ -852,7 +860,7 @@ zfs_do_get(int argc, char **argv) (void) fprintf(stderr, gettext("too " "many fields given to -o " "option\n")); - usage(FALSE); + usage(B_FALSE); } switch (getsubopt(&optarg, col_subopts, @@ -873,7 +881,7 @@ zfs_do_get(int argc, char **argv) (void) fprintf(stderr, gettext("invalid column name " "'%s'\n"), value); - usage(FALSE); + usage(B_FALSE); } } break; @@ -906,7 +914,7 @@ zfs_do_get(int argc, char **argv) (void) fprintf(stderr, gettext("invalid source " "'%s'\n"), value); - usage(FALSE); + usage(B_FALSE); } } break; @@ -914,7 +922,7 @@ zfs_do_get(int argc, char **argv) case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -924,7 +932,7 @@ zfs_do_get(int argc, char **argv) if (argc < 1) { (void) fprintf(stderr, gettext("missing property " "argument\n")); - usage(FALSE); + usage(B_FALSE); } fields = argv[0]; @@ -935,7 +943,7 @@ zfs_do_get(int argc, char **argv) * given dataset. */ if (strcmp(fields, "all") == 0) - cb.cb_isall = TRUE; + cb.cb_isall = B_TRUE; if ((ret = zfs_get_proplist(fields, cb.cb_prop, ZFS_NPROP_ALL, &cb.cb_nprop, &badopt)) != 0) { @@ -945,7 +953,7 @@ zfs_do_get(int argc, char **argv) else (void) fprintf(stderr, gettext("too many properties " "specified\n")); - usage(FALSE); + usage(B_FALSE); } argc--; @@ -954,7 +962,7 @@ zfs_do_get(int argc, char **argv) /* check for at least one dataset name */ if (argc < 1) { (void) fprintf(stderr, gettext("missing dataset argument\n")); - usage(FALSE); + usage(B_FALSE); } /* @@ -1008,7 +1016,7 @@ inherit_callback(zfs_handle_t *zhp, void *data) static int zfs_do_inherit(int argc, char **argv) { - int recurse = 0; + boolean_t recurse = B_FALSE; int c; zfs_prop_t prop; char *propname; @@ -1017,13 +1025,13 @@ zfs_do_inherit(int argc, char **argv) while ((c = getopt(argc, argv, "r")) != -1) { switch (c) { case 'r': - recurse = TRUE; + recurse = B_TRUE; break; case '?': default: (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1033,11 +1041,11 @@ zfs_do_inherit(int argc, char **argv) /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing property argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing dataset argument\n")); - usage(FALSE); + usage(B_FALSE); } propname = argv[0]; @@ -1050,7 +1058,7 @@ zfs_do_inherit(int argc, char **argv) if ((prop = zfs_name_to_prop(propname)) == ZFS_PROP_INVAL) { (void) fprintf(stderr, gettext("invalid property '%s'\n"), propname); - usage(FALSE); + usage(B_FALSE); } if (zfs_prop_readonly(prop)) { (void) fprintf(stderr, gettext("%s property is read-only\n"), @@ -1083,8 +1091,8 @@ zfs_do_inherit(int argc, char **argv) * '-r' is specified. */ typedef struct list_cbdata { - int cb_first; - int cb_scripted; + boolean_t cb_first; + boolean_t cb_scripted; zfs_prop_t cb_fields[ZFS_NPROP_ALL]; int cb_fieldcount; } list_cbdata_t; @@ -1129,7 +1137,7 @@ print_dataset(zfs_handle_t *zhp, zfs_prop_t *fields, size_t count, int scripted) } if (zfs_prop_get(zhp, fields[i], property, - sizeof (property), NULL, NULL, 0, FALSE) != 0) + sizeof (property), NULL, NULL, 0, B_FALSE) != 0) (void) strlcpy(property, "-", sizeof (property)); /* @@ -1159,7 +1167,7 @@ list_callback(zfs_handle_t *zhp, void *data) if (cbp->cb_first) { if (!cbp->cb_scripted) print_header(cbp->cb_fields, cbp->cb_fieldcount); - cbp->cb_first = FALSE; + cbp->cb_first = B_FALSE; } print_dataset(zhp, cbp->cb_fields, cbp->cb_fieldcount, @@ -1172,8 +1180,8 @@ static int zfs_do_list(int argc, char **argv) { int c; - int recurse = 0; - int scripted = FALSE; + boolean_t recurse = B_FALSE; + boolean_t scripted = B_FALSE; static char default_fields[] = "name,used,available,referenced,mountpoint"; int types = ZFS_TYPE_ANY; @@ -1193,10 +1201,10 @@ zfs_do_list(int argc, char **argv) fields = optarg; break; case 'r': - recurse = TRUE; + recurse = B_TRUE; break; case 'H': - scripted = TRUE; + scripted = B_TRUE; break; case 't': types = 0; @@ -1216,19 +1224,19 @@ zfs_do_list(int argc, char **argv) (void) fprintf(stderr, gettext("invalid type '%s'\n"), value); - usage(FALSE); + usage(B_FALSE); } } break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1258,16 +1266,16 @@ zfs_do_list(int argc, char **argv) else (void) fprintf(stderr, gettext("too many properties " "specified\n")); - usage(FALSE); + usage(B_FALSE); } cb.cb_fieldcount += alloffset; cb.cb_scripted = scripted; - cb.cb_first = TRUE; + cb.cb_first = B_TRUE; ret = zfs_for_each(argc, argv, recurse, types, list_callback, &cb); - if (ret == 0 && cb.cb_first == TRUE) + if (ret == 0 && cb.cb_first) (void) printf(gettext("no datasets available\n")); return (ret); @@ -1283,39 +1291,76 @@ static int zfs_do_rename(int argc, char **argv) { zfs_handle_t *zhp; - int ret = 1; + int ret; /* check options */ if (argc > 1 && argv[1][0] == '-') { (void) fprintf(stderr, gettext("invalid option '%c'\n"), argv[1][1]); - usage(FALSE); + usage(B_FALSE); } /* check number of arguments */ if (argc < 2) { (void) fprintf(stderr, gettext("missing source dataset " "argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 3) { (void) fprintf(stderr, gettext("missing target dataset " "argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 3) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } - if ((zhp = zfs_open(argv[1], ZFS_TYPE_ANY)) == NULL) + if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_ANY)) == NULL) return (1); - if (zfs_rename(zhp, argv[2]) != 0) - goto error; + ret = (zfs_rename(zhp, argv[2]) != 0); + + zfs_close(zhp); + return (ret); +} + +/* + * zfs promote <fs> + * + * Promotes the given clone fs to be the parent + */ +/* ARGSUSED */ +static int +zfs_do_promote(int argc, char **argv) +{ + zfs_handle_t *zhp; + int ret; + + /* check options */ + if (argc > 1 && argv[1][0] == '-') { + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + argv[1][1]); + usage(B_FALSE); + } + + /* check number of arguments */ + if (argc < 2) { + (void) fprintf(stderr, gettext("missing clone filesystem" + "argument\n")); + usage(B_FALSE); + } + if (argc > 2) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (1); + + ret = (zfs_promote(zhp) != 0); - ret = 0; -error: zfs_close(zhp); return (ret); } @@ -1333,12 +1378,12 @@ error: */ typedef struct rollback_cbdata { uint64_t cb_create; - int cb_first; + boolean_t cb_first; int cb_doclones; char *cb_target; int cb_error; - int cb_recurse; - int cb_dependent; + boolean_t cb_recurse; + boolean_t cb_dependent; } rollback_cbdata_t; /* @@ -1352,8 +1397,10 @@ rollback_check(zfs_handle_t *zhp, void *data) { rollback_cbdata_t *cbp = data; - if (cbp->cb_doclones) + if (cbp->cb_doclones) { + zfs_close(zhp); return (0); + } if (!cbp->cb_dependent) { if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 && @@ -1374,10 +1421,10 @@ rollback_check(zfs_handle_t *zhp, void *data) } if (cbp->cb_recurse) { - cbp->cb_dependent = TRUE; + cbp->cb_dependent = B_TRUE; (void) zfs_iter_dependents(zhp, rollback_check, cbp); - cbp->cb_dependent = FALSE; + cbp->cb_dependent = B_FALSE; } else { (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); @@ -1429,7 +1476,7 @@ zfs_do_rollback(int argc, char **argv) case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1439,22 +1486,22 @@ zfs_do_rollback(int argc, char **argv) /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing dataset argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } /* open the snapshot */ - if ((snap = zfs_open(argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) + if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) return (1); /* open the parent dataset */ (void) strlcpy(parentname, argv[0], sizeof (parentname)); verify((delim = strrchr(parentname, '@')) != NULL); *delim = '\0'; - if ((zhp = zfs_open(parentname, ZFS_TYPE_ANY)) == NULL) { + if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_ANY)) == NULL) { zfs_close(snap); return (1); } @@ -1465,7 +1512,7 @@ zfs_do_rollback(int argc, char **argv) */ cb.cb_target = argv[0]; cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); - cb.cb_first = 1; + cb.cb_first = B_TRUE; cb.cb_error = 0; (void) zfs_iter_children(zhp, rollback_check, &cb); @@ -1606,18 +1653,18 @@ zfs_do_set(int argc, char **argv) if (argc > 1 && argv[1][0] == '-') { (void) fprintf(stderr, gettext("invalid option '%c'\n"), argv[1][1]); - usage(FALSE); + usage(B_FALSE); } /* check number of arguments */ if (argc < 2) { (void) fprintf(stderr, gettext("missing property=value " "argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 3) { (void) fprintf(stderr, gettext("missing dataset name\n")); - usage(FALSE); + usage(B_FALSE); } /* validate property=value argument */ @@ -1625,7 +1672,7 @@ zfs_do_set(int argc, char **argv) if ((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) { (void) fprintf(stderr, gettext("missing value in " "property=value argument\n")); - usage(FALSE); + usage(B_FALSE); } *cb.cb_value = '\0'; @@ -1634,12 +1681,12 @@ zfs_do_set(int argc, char **argv) if (*cb.cb_propname == '\0') { (void) fprintf(stderr, gettext("missing property in property=value argument\n")); - usage(FALSE); + usage(B_FALSE); } if (*cb.cb_value == '\0') { (void) fprintf(stderr, gettext("missing value in property=value argument\n")); - usage(FALSE); + usage(B_FALSE); } /* get the property type */ @@ -1647,7 +1694,7 @@ zfs_do_set(int argc, char **argv) ZFS_PROP_INVAL) { (void) fprintf(stderr, gettext("invalid property '%s'\n"), cb.cb_propname); - usage(FALSE); + usage(B_FALSE); } /* @@ -1655,10 +1702,10 @@ zfs_do_set(int argc, char **argv) * once now so we don't generate multiple errors each time we try to * apply it to a dataset. */ - if (zfs_prop_validate(cb.cb_prop, cb.cb_value, NULL) != 0) + if (zfs_prop_validate(g_zfs, cb.cb_prop, cb.cb_value, NULL) != 0) return (1); - return (zfs_for_each(argc - 2, argv + 2, FALSE, + return (zfs_for_each(argc - 2, argv + 2, B_FALSE, ZFS_TYPE_ANY, set_callback, &cb)); } @@ -1675,20 +1722,20 @@ zfs_do_snapshot(int argc, char **argv) if (argc > 1 && argv[1][0] == '-') { (void) fprintf(stderr, gettext("invalid option '%c'\n"), argv[1][1]); - usage(FALSE); + usage(B_FALSE); } /* check number of arguments */ if (argc < 2) { (void) fprintf(stderr, gettext("missing snapshot argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 2) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } - return (zfs_snapshot(argv[1]) != 0); + return (zfs_snapshot(g_zfs, argv[1]) != 0); } /* @@ -1712,12 +1759,12 @@ zfs_do_send(int argc, char **argv) case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1727,11 +1774,11 @@ zfs_do_send(int argc, char **argv) /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing snapshot argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } if (isatty(STDOUT_FILENO)) { @@ -1743,10 +1790,11 @@ zfs_do_send(int argc, char **argv) } if (fromname) { - if ((zhp_from = zfs_open(fromname, ZFS_TYPE_SNAPSHOT)) == NULL) + if ((zhp_from = zfs_open(g_zfs, fromname, + ZFS_TYPE_SNAPSHOT)) == NULL) return (1); } - if ((zhp_to = zfs_open(argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) + if ((zhp_to = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) return (1); err = zfs_send(zhp_to, zhp_from); @@ -1767,31 +1815,31 @@ static int zfs_do_receive(int argc, char **argv) { int c, err; - int isprefix = FALSE; - int dryrun = FALSE; - int verbose = FALSE; + boolean_t isprefix = B_FALSE; + boolean_t dryrun = B_FALSE; + boolean_t verbose = B_FALSE; /* check options */ while ((c = getopt(argc, argv, ":dnv")) != -1) { switch (c) { case 'd': - isprefix = TRUE; + isprefix = B_TRUE; break; case 'n': - dryrun = TRUE; + dryrun = B_TRUE; break; case 'v': - verbose = TRUE; + verbose = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1801,11 +1849,11 @@ zfs_do_receive(int argc, char **argv) /* check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing snapshot argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } if (isatty(STDIN_FILENO)) { @@ -1816,7 +1864,7 @@ zfs_do_receive(int argc, char **argv) return (1); } - err = zfs_receive(argv[0], isprefix, verbose, dryrun); + err = zfs_receive(g_zfs, argv[0], isprefix, verbose, dryrun); return (err != 0); } @@ -1868,7 +1916,7 @@ get_all_filesystems(zfs_handle_t ***fslist, size_t *count) { get_all_cbdata_t cb = { 0 }; - (void) zfs_iter_root(get_one_filesystem, &cb); + (void) zfs_iter_root(g_zfs, get_one_filesystem, &cb); *fslist = cb.cb_handles; *count = cb.cb_used; @@ -1883,9 +1931,9 @@ mountpoint_compare(const void *a, const void *b) char mountb[MAXPATHLEN]; verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta, - sizeof (mounta), NULL, NULL, 0, FALSE) == 0); + sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb, - sizeof (mountb), NULL, NULL, 0, FALSE) == 0); + sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); return (strcmp(mounta, mountb)); } @@ -1953,9 +2001,9 @@ share_mount_callback(zfs_handle_t *zhp, void *data) * with a legacy mountpoint, or those with legacy share options. */ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, - sizeof (mountpoint), NULL, NULL, 0, FALSE) == 0); + sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, - sizeof (shareopts), NULL, NULL, 0, FALSE) == 0); + sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0); if (cbp->cb_type == OP_SHARE) { if (strcmp(shareopts, "off") == 0) { @@ -2080,12 +2128,12 @@ share_or_mount(int type, int argc, char **argv) case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -2099,7 +2147,7 @@ share_or_mount(int type, int argc, char **argv) if (argc != 0) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } get_all_filesystems(&fslist, &count); @@ -2124,7 +2172,7 @@ share_or_mount(int type, int argc, char **argv) if (type == OP_SHARE) { (void) fprintf(stderr, gettext("missing filesystem " "argument\n")); - usage(FALSE); + usage(B_FALSE); } /* @@ -2149,13 +2197,14 @@ share_or_mount(int type, int argc, char **argv) if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } - if ((zhp = zfs_open(argv[0], ZFS_TYPE_FILESYSTEM)) == NULL) + if ((zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM)) == NULL) ret = 1; else { - cb.cb_explicit = TRUE; + cb.cb_explicit = B_TRUE; ret = share_mount_callback(zhp, &cb); zfs_close(zhp); } @@ -2210,7 +2259,7 @@ unshare_unmount_compare(const void *larg, const void *rarg, void *unused) * and unmount it appropriately. */ static int -unshare_unmount_path(int type, char *path, int flags, int is_manual) +unshare_unmount_path(int type, char *path, int flags, boolean_t is_manual) { zfs_handle_t *zhp; int ret; @@ -2252,12 +2301,13 @@ unshare_unmount_path(int type, char *path, int flags, int is_manual) return (1); } - if ((zhp = zfs_open(entry.mnt_special, ZFS_TYPE_FILESYSTEM)) == NULL) + if ((zhp = zfs_open(g_zfs, entry.mnt_special, + ZFS_TYPE_FILESYSTEM)) == NULL) return (1); verify(zfs_prop_get(zhp, type == OP_SHARE ? ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property, - sizeof (property), NULL, NULL, 0, FALSE) == 0); + sizeof (property), NULL, NULL, 0, B_FALSE) == 0); if (type == OP_SHARE) { if (strcmp(property, "off") == 0) { @@ -2318,7 +2368,7 @@ unshare_unmount(int type, int argc, char **argv) case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -2329,7 +2379,7 @@ unshare_unmount(int type, int argc, char **argv) if (do_all) { if (argc != 0) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } } else if (argc != 1) { if (argc == 0) @@ -2338,7 +2388,7 @@ unshare_unmount(int type, int argc, char **argv) else (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } if (do_all) { @@ -2390,7 +2440,7 @@ unshare_unmount(int type, int argc, char **argv) if (strchr(entry.mnt_special, '@') != NULL) continue; - if ((zhp = zfs_open(entry.mnt_special, + if ((zhp = zfs_open(g_zfs, entry.mnt_special, ZFS_TYPE_FILESYSTEM)) == NULL) { ret = 1; continue; @@ -2399,7 +2449,7 @@ unshare_unmount(int type, int argc, char **argv) verify(zfs_prop_get(zhp, type == OP_SHARE ? ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property, sizeof (property), NULL, NULL, - 0, FALSE) == 0); + 0, B_FALSE) == 0); /* Ignore legacy mounts and shares */ if ((type == OP_SHARE && @@ -2476,14 +2526,15 @@ unshare_unmount(int type, int argc, char **argv) */ if (argv[0][0] == '/') return (unshare_unmount_path(type, argv[0], - flags, FALSE)); + flags, B_FALSE)); - if ((zhp = zfs_open(argv[0], ZFS_TYPE_FILESYSTEM)) == NULL) + if ((zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM)) == NULL) return (1); verify(zfs_prop_get(zhp, type == OP_SHARE ? ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property, - sizeof (property), NULL, NULL, 0, FALSE) == 0); + sizeof (property), NULL, NULL, 0, B_FALSE) == 0); switch (type) { case OP_SHARE: @@ -2581,7 +2632,7 @@ manual_mount(int argc, char **argv) case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -2613,11 +2664,11 @@ manual_mount(int argc, char **argv) path = argv[1]; /* try to open the dataset */ - if ((zhp = zfs_open(dataset, ZFS_TYPE_FILESYSTEM)) == NULL) + if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_FILESYSTEM)) == NULL) return (1); (void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, - sizeof (mountpoint), NULL, NULL, 0, FALSE); + sizeof (mountpoint), NULL, NULL, 0, B_FALSE); /* check for legacy mountpoint and complain appropriately */ ret = 0; @@ -2683,7 +2734,7 @@ manual_unmount(int argc, char **argv) return (2); } - return (unshare_unmount_path(OP_MOUNT, argv[0], flags, TRUE)); + return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE)); } static int @@ -2702,9 +2753,9 @@ volcheck(zpool_handle_t *zhp, void *data) * links, depending on the value of 'isinit'. */ static int -do_volcheck(int isinit) +do_volcheck(boolean_t isinit) { - return (zpool_iter(volcheck, (void *)isinit) ? 1 : 0); + return (zpool_iter(g_zfs, volcheck, (void *)isinit) ? 1 : 0); } int @@ -2720,6 +2771,14 @@ main(int argc, char **argv) opterr = 0; + if ((g_zfs = libzfs_init()) == NULL) { + (void) fprintf(stderr, gettext("internal error: failed to " + "initialize ZFS library\n")); + return (1); + } + + libzfs_print_on_error(g_zfs, B_TRUE); + if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) { (void) fprintf(stderr, gettext("internal error: unable to " "open %s\n"), MNTTAB); @@ -2741,7 +2800,7 @@ main(int argc, char **argv) */ if (argc < 2) { (void) fprintf(stderr, gettext("missing command\n")); - usage(FALSE); + usage(B_FALSE); } cmdname = argv[1]; @@ -2762,16 +2821,16 @@ main(int argc, char **argv) * Special case '-?' */ if (strcmp(cmdname, "-?") == 0) - usage(TRUE); + usage(B_TRUE); /* * 'volinit' and 'volfini' do not appear in the usage message, * so we have to special case them here. */ if (strcmp(cmdname, "volinit") == 0) - return (do_volcheck(TRUE)); + return (do_volcheck(B_TRUE)); else if (strcmp(cmdname, "volfini") == 0) - return (do_volcheck(FALSE)); + return (do_volcheck(B_FALSE)); /* * Run the appropriate command. @@ -2790,12 +2849,14 @@ main(int argc, char **argv) if (i == NCOMMAND) { (void) fprintf(stderr, gettext("unrecognized " "command '%s'\n"), cmdname); - usage(FALSE); + usage(B_FALSE); } } (void) fclose(mnttab_file); + libzfs_fini(g_zfs); + /* * The 'ZFS_ABORT' environment variable causes us to dump core on exit * for the purposes of running ::findleaks. diff --git a/usr/src/cmd/zfs/zfs_util.h b/usr/src/cmd/zfs/zfs_util.h index 5b2fcfa9f3..c7f2f16186 100644 --- a/usr/src/cmd/zfs/zfs_util.h +++ b/usr/src/cmd/zfs/zfs_util.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,11 +28,14 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <libzfs.h> + #ifdef __cplusplus extern "C" { #endif void * safe_malloc(size_t size); +libzfs_handle_t *g_zfs; #ifdef __cplusplus } diff --git a/usr/src/cmd/zinject/Makefile.com b/usr/src/cmd/zinject/Makefile.com index 14651a366c..c1ac4ac922 100644 --- a/usr/src/cmd/zinject/Makefile.com +++ b/usr/src/cmd/zinject/Makefile.com @@ -34,7 +34,7 @@ include ../../Makefile.cmd INCS += -I../../../lib/libzpool/common INCS += -I../../../uts/common/fs/zfs -LDLIBS += -lzpool -lzfs +LDLIBS += -lzpool -lzfs -lnvpair C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/zinject/translate.c b/usr/src/cmd/zinject/translate.c index 882b230930..b4f6693aa1 100644 --- a/usr/src/cmd/zinject/translate.c +++ b/usr/src/cmd/zinject/translate.c @@ -436,22 +436,28 @@ translate_device(const char *pool, const char *device, zinject_record_t *record) { char *end; zpool_handle_t *zhp; + nvlist_t *tgt; + boolean_t isspare; /* * Given a device name or GUID, create an appropriate injection record * with zi_guid set. */ - if ((zhp = zpool_open(pool)) == NULL) + if ((zhp = zpool_open(g_zfs, pool)) == NULL) return (-1); record->zi_guid = strtoull(device, &end, 16); - if (record->zi_guid == 0 || *end != '\0') - record->zi_guid = zpool_vdev_to_guid(zhp, device); + if (record->zi_guid == 0 || *end != '\0') { + tgt = zpool_find_vdev(zhp, device, &isspare); - if (record->zi_guid == 0) { - (void) fprintf(stderr, "cannot find device '%s' in pool '%s'\n", - device, pool); - return (-1); + if (tgt == NULL) { + (void) fprintf(stderr, "cannot find device '%s' in " + "pool '%s'\n", device, pool); + return (-1); + } + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, + &record->zi_guid) == 0); } return (0); diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c index b584fb0de5..02fc6a16ef 100644 --- a/usr/src/cmd/zinject/zinject.c +++ b/usr/src/cmd/zinject/zinject.c @@ -151,6 +151,7 @@ #include "zinject.h" +libzfs_handle_t *g_zfs; int zfs_fd; #define ECKSUM EBADE @@ -479,6 +480,14 @@ main(int argc, char **argv) int ret; int flags = 0; + if ((g_zfs = libzfs_init()) == NULL) { + (void) fprintf(stderr, "internal error: failed to " + "initialize ZFS library\n"); + return (1); + } + + libzfs_print_on_error(g_zfs, B_TRUE); + if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { (void) fprintf(stderr, "failed to open ZFS device\n"); return (1); @@ -721,7 +730,7 @@ main(int argc, char **argv) * time we access the pool. */ if (dataset[0] != '\0' && domount) { - if ((zhp = zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) + if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL) return (1); if (zfs_unmount(zhp, NULL, 0) != 0) @@ -735,5 +744,7 @@ main(int argc, char **argv) if (dataset[0] != '\0' && domount) ret = (zfs_mount(zhp, NULL, 0) != 0); + libzfs_fini(g_zfs); + return (ret); } diff --git a/usr/src/cmd/zinject/zinject.h b/usr/src/cmd/zinject/zinject.h index bdbc2454c4..8086c4bc80 100644 --- a/usr/src/cmd/zinject/zinject.h +++ b/usr/src/cmd/zinject/zinject.h @@ -57,6 +57,8 @@ int translate_device(const char *pool, const char *device, zinject_record_t *record); void usage(void); +extern libzfs_handle_t *g_zfs; + #ifdef __cplusplus } #endif diff --git a/usr/src/cmd/zoneadm/zfs.c b/usr/src/cmd/zoneadm/zfs.c index 98fa5a44b5..eb9822781a 100644 --- a/usr/src/cmd/zoneadm/zfs.c +++ b/usr/src/cmd/zoneadm/zfs.c @@ -47,7 +47,7 @@ #include "zoneadm.h" -static const char *current_dataset; +libzfs_handle_t *g_zfs; typedef struct zfs_mount_data { char *match_name; @@ -61,41 +61,6 @@ typedef struct zfs_snapshot_data { } zfs_snapshot_data_t; /* - * ZFS error handler to do nothing - do not print the libzfs error messages. - */ -/* ARGSUSED */ -static void -noop_err_handler(const char *fmt, va_list ap) -{ -} - -/* - * Custom error handler for errors incurred as part of verifying datasets. We - * want to trim off the leading 'cannot open ...' to create a better error - * message. The only other way this can fail is if we fail to set the 'zoned' - * property. In this case we just pass the error on verbatim. - */ -static void -err_handler(const char *fmt, va_list ap) -{ - char buf[1024]; - - (void) vsnprintf(buf, sizeof (buf), fmt, ap); - - if (strncmp(gettext("cannot open "), buf, - strlen(gettext("cannot open "))) == 0) - /* - * TRANSLATION_NOTE - * zfs and dataset are literals that should not be translated. - */ - (void) fprintf(stderr, gettext("could not verify zfs " - "dataset %s%s\n"), current_dataset, strchr(buf, ':')); - else - (void) fprintf(stderr, gettext("could not verify zfs dataset " - "%s: %s\n"), current_dataset, buf); -} - -/* * A ZFS file system iterator call-back function which is used to validate * datasets imported into the zone. */ @@ -141,7 +106,7 @@ match_mountpoint(zfs_handle_t *zhp, void *data) cbp = (zfs_mount_data_t *)data; if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL, - 0, FALSE) == 0 && strcmp(mp, cbp->match_name) == 0) { + 0, B_FALSE) == 0 && strcmp(mp, cbp->match_name) == 0) { cbp->match_handle = zhp; return (1); } @@ -161,7 +126,7 @@ mount2zhandle(char *mountpoint) cb.match_name = mountpoint; cb.match_handle = NULL; - (void) zfs_iter_root(match_mountpoint, &cb); + (void) zfs_iter_root(g_zfs, match_mountpoint, &cb); return (cb.match_handle); } @@ -331,7 +296,7 @@ take_snapshot(char *source_zone, zfs_handle_t *zhp, char *snapshot_name, if (pre_snapshot(source_zone) != Z_OK) return (Z_ERR); - res = zfs_snapshot(snapshot_name); + res = zfs_snapshot(g_zfs, snapshot_name); if (post_snapshot(source_zone) != Z_OK) return (Z_ERR); @@ -443,7 +408,7 @@ clone_snap(char *snapshot_name, char *zonepath) zfs_handle_t *zhp; zfs_handle_t *clone; - if ((zhp = zfs_open(snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL) + if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL) return (Z_NO_ENTRY); (void) printf(gettext("Cloning snapshot %s\n"), snapshot_name); @@ -454,7 +419,7 @@ clone_snap(char *snapshot_name, char *zonepath) return (Z_ERR); /* create the mountpoint if necessary */ - if ((clone = zfs_open(zonepath, ZFS_TYPE_ANY)) == NULL) + if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_ANY)) == NULL) return (Z_ERR); /* @@ -574,14 +539,14 @@ snap2path(char *snap_name, char *path, int len) /* Get the file system name from the snap_name. */ *p = '\0'; - zhp = zfs_open(snap_name, ZFS_TYPE_ANY); + zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_ANY); *p = '@'; if (zhp == NULL) return (Z_ERR); /* Get the file system mount point. */ if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL, - 0, FALSE) != 0) { + 0, B_FALSE) != 0) { zfs_close(zhp); return (Z_ERR); } @@ -739,15 +704,16 @@ create_zfs_zonepath(char *zonepath) if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK) return; - zfs_set_error_handler(noop_err_handler); - - if (zfs_create(zfs_name, ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0 || - (zhp = zfs_open(zfs_name, ZFS_TYPE_ANY)) == NULL) { - zfs_set_error_handler(NULL); + if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0 || + (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_ANY)) == NULL) { + (void) fprintf(stderr, gettext("cannot create ZFS dataset %s: " + "%s\n"), zfs_name, libzfs_error_description(g_zfs)); return; } if (zfs_mount(zhp, NULL, 0) != 0) { + (void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: " + "%s\n"), zfs_name, libzfs_error_description(g_zfs)); (void) zfs_destroy(zhp); } else if (zfs_prop_set(zhp, ZFS_PROP_SHARENFS, "off") != 0) { (void) fprintf(stderr, gettext("file system %s successfully " @@ -765,7 +731,6 @@ create_zfs_zonepath(char *zonepath) } } - zfs_set_error_handler(NULL); zfs_close(zhp); } @@ -782,12 +747,8 @@ destroy_zfs(char *zonepath) boolean_t is_clone = B_FALSE; char origin[ZFS_MAXPROPLEN]; - zfs_set_error_handler(noop_err_handler); - - if ((zhp = mount2zhandle(zonepath)) == NULL) { - zfs_set_error_handler(NULL); + if ((zhp = mount2zhandle(zonepath)) == NULL) return (Z_ERR); - } /* * We can't destroy the file system if it has dependents. @@ -795,7 +756,6 @@ destroy_zfs(char *zonepath) if (zfs_iter_dependents(zhp, has_dependent, NULL) != 0 || zfs_unmount(zhp, NULL, 0) != 0) { zfs_close(zhp); - zfs_set_error_handler(NULL); return (Z_ERR); } @@ -804,10 +764,9 @@ destroy_zfs(char *zonepath) * to destroy that as well. */ if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL, - NULL, 0, FALSE) == 0) + NULL, 0, B_FALSE) == 0) is_clone = B_TRUE; - zfs_set_error_handler(NULL); if (zfs_destroy(zhp) != 0) { /* * If the destroy fails for some reason, try to remount @@ -818,7 +777,6 @@ destroy_zfs(char *zonepath) zfs_close(zhp); return (Z_ERR); } - zfs_set_error_handler(noop_err_handler); (void) printf(gettext("The ZFS file system for this zone has been " "destroyed.\n")); @@ -829,17 +787,16 @@ destroy_zfs(char *zonepath) /* * Try to clean up the snapshot that the clone was taken from. */ - if ((ohp = zfs_open(origin, ZFS_TYPE_SNAPSHOT)) != NULL) { + if ((ohp = zfs_open(g_zfs, origin, + ZFS_TYPE_SNAPSHOT)) != NULL) { if (zfs_iter_dependents(ohp, has_dependent, NULL) - == 0 && zfs_unmount(ohp, NULL, 0) == 0) { + == 0 && zfs_unmount(ohp, NULL, 0) == 0) (void) zfs_destroy(ohp); - } zfs_close(ohp); } } zfs_close(zhp); - zfs_set_error_handler(NULL); return (Z_OK); } @@ -889,12 +846,8 @@ move_zfs(char *zonepath, char *new_zonepath) int ret = Z_ERR; zfs_handle_t *zhp; - zfs_set_error_handler(noop_err_handler); - - if ((zhp = mount2zhandle(zonepath)) == NULL) { - zfs_set_error_handler(NULL); + if ((zhp = mount2zhandle(zonepath)) == NULL) return (Z_ERR); - } if (zfs_prop_set(zhp, ZFS_PROP_MOUNTPOINT, new_zonepath) == 0) { /* @@ -906,7 +859,6 @@ move_zfs(char *zonepath, char *new_zonepath) } zfs_close(zhp); - zfs_set_error_handler(NULL); return (ret); } @@ -940,14 +892,13 @@ verify_datasets(zone_dochandle_t handle) return (Z_ERR); } - zfs_set_error_handler(err_handler); - while (zonecfg_getdsent(handle, &dstab) == Z_OK) { - current_dataset = dstab.zone_dataset_name; - - if ((zhp = zfs_open(dstab.zone_dataset_name, + if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { + (void) fprintf(stderr, gettext("could not verify zfs " + "dataset %s: %s\n"), dstab.zone_dataset_name, + libzfs_error_description(g_zfs)); return_code = Z_ERR; continue; } @@ -978,7 +929,6 @@ verify_datasets(zone_dochandle_t handle) zfs_close(zhp); } (void) zonecfg_enddsent(handle); - zfs_set_error_handler(NULL); return (return_code); } @@ -993,13 +943,11 @@ verify_fs_zfs(struct zone_fstab *fstab) zfs_handle_t *zhp; char propbuf[ZFS_MAXPROPLEN]; - zfs_set_error_handler(noop_err_handler); - - if ((zhp = zfs_open(fstab->zone_fs_special, ZFS_TYPE_ANY)) == NULL) { + if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special, + ZFS_TYPE_ANY)) == NULL) { (void) fprintf(stderr, gettext("could not verify fs %s: " "could not access zfs dataset '%s'\n"), fstab->zone_fs_dir, fstab->zone_fs_special); - zfs_set_error_handler(NULL); return (Z_ERR); } @@ -1008,7 +956,6 @@ verify_fs_zfs(struct zone_fstab *fstab) "'%s' is not a file system\n"), fstab->zone_fs_dir, fstab->zone_fs_special); zfs_close(zhp); - zfs_set_error_handler(NULL); return (Z_ERR); } @@ -1018,11 +965,21 @@ verify_fs_zfs(struct zone_fstab *fstab) "zfs '%s' mountpoint is not \"legacy\"\n"), fstab->zone_fs_dir, fstab->zone_fs_special); zfs_close(zhp); - zfs_set_error_handler(NULL); return (Z_ERR); } zfs_close(zhp); - zfs_set_error_handler(NULL); + return (Z_OK); +} + +int +init_zfs(void) +{ + if ((g_zfs = libzfs_init()) == NULL) { + (void) fprintf(stderr, gettext("failed to initialize ZFS " + "library\n")); + return (Z_ERR); + } + return (Z_OK); } diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c index e25895736c..50c3b1ecd7 100644 --- a/usr/src/cmd/zoneadm/zoneadm.c +++ b/usr/src/cmd/zoneadm/zoneadm.c @@ -4433,6 +4433,9 @@ main(int argc, char **argv) exit(Z_ERR); } + if (init_zfs() != Z_OK) + exit(Z_ERR); + while ((arg = getopt(argc, argv, "?z:R:")) != EOF) { switch (arg) { case '?': diff --git a/usr/src/cmd/zoneadm/zoneadm.h b/usr/src/cmd/zoneadm/zoneadm.h index 161d7cee18..d6aa67798d 100644 --- a/usr/src/cmd/zoneadm/zoneadm.h +++ b/usr/src/cmd/zoneadm/zoneadm.h @@ -81,6 +81,7 @@ extern boolean_t is_zonepath_zfs(char *zonepath); extern int move_zfs(char *zonepath, char *new_zonepath); extern int verify_datasets(zone_dochandle_t handle); extern int verify_fs_zfs(struct zone_fstab *fstab); +extern int init_zfs(void); /* * sw_cmp.c diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index d629c7a9c4..84b06afb17 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -2631,21 +2631,13 @@ out: return (error); } -/* ARGSUSED */ -static void -zfs_error_handler(const char *fmt, va_list ap) -{ - /* - * Do nothing - we interpret the failures from each libzfs call below. - */ -} - static int validate_datasets(zlog_t *zlogp) { zone_dochandle_t handle; struct zone_dstab dstab; zfs_handle_t *zhp; + libzfs_handle_t *hdl; if ((handle = zonecfg_init_handle()) == NULL) { zerror(zlogp, B_TRUE, "getting zone configuration handle"); @@ -2663,15 +2655,20 @@ validate_datasets(zlog_t *zlogp) return (-1); } - zfs_set_error_handler(zfs_error_handler); + if ((hdl = libzfs_init()) == NULL) { + zerror(zlogp, B_FALSE, "opening ZFS library"); + zonecfg_fini_handle(handle); + return (-1); + } while (zonecfg_getdsent(handle, &dstab) == Z_OK) { - if ((zhp = zfs_open(dstab.zone_dataset_name, + if ((zhp = zfs_open(hdl, dstab.zone_dataset_name, ZFS_TYPE_FILESYSTEM)) == NULL) { zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'", dstab.zone_dataset_name); zonecfg_fini_handle(handle); + libzfs_fini(hdl); return (-1); } @@ -2686,6 +2683,7 @@ validate_datasets(zlog_t *zlogp) dstab.zone_dataset_name); zonecfg_fini_handle(handle); zfs_close(zhp); + libzfs_fini(hdl); return (-1); } @@ -2694,6 +2692,7 @@ validate_datasets(zlog_t *zlogp) (void) zonecfg_enddsent(handle); zonecfg_fini_handle(handle); + libzfs_fini(hdl); return (0); } diff --git a/usr/src/cmd/zpool/zpool_dataset.c b/usr/src/cmd/zpool/zpool_dataset.c index d6cdde87bd..0b4c6a15fe 100644 --- a/usr/src/cmd/zpool/zpool_dataset.c +++ b/usr/src/cmd/zpool/zpool_dataset.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -59,6 +58,8 @@ do_unmount(zfs_handle_t *zfsp, void *data) if (zfs_unmount(zfsp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0) cbp->cb_failed = 1; + zfs_close(zfsp); + return (0); } @@ -78,7 +79,8 @@ unmount_datasets(zpool_handle_t *zhp, int force) if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) return (0); - if ((zfsp = zfs_open(zpool_get_name(zhp), ZFS_TYPE_FILESYSTEM)) == NULL) + if ((zfsp = zfs_open(g_zfs, zpool_get_name(zhp), + ZFS_TYPE_FILESYSTEM)) == NULL) return (-1); cb.cb_force = force; @@ -89,12 +91,8 @@ unmount_datasets(zpool_handle_t *zhp, int force) return (-1); } - if (do_unmount(zfsp, &cb) != 0 || cb.cb_failed != 0) { - zfs_close(zfsp); + if (do_unmount(zfsp, &cb) != 0 || cb.cb_failed != 0) return (-1); - } - - zfs_close(zfsp); return (0); } @@ -108,8 +106,10 @@ do_mount_share(zfs_handle_t *zfsp, void *data) cbdata_t *cbp = data; int ret; - if (zfs_get_type(zfsp) != ZFS_TYPE_FILESYSTEM) + if (zfs_get_type(zfsp) != ZFS_TYPE_FILESYSTEM) { + zfs_close(zfsp); return (0); + } if (zfs_mount(zfsp, cbp->cb_mntopts, 0) != 0) cbp->cb_failed = 1; @@ -118,6 +118,7 @@ do_mount_share(zfs_handle_t *zfsp, void *data) ret = zfs_iter_children(zfsp, do_mount_share, data); + zfs_close(zfsp); return (ret); } @@ -142,15 +143,12 @@ mount_share_datasets(zpool_handle_t *zhp, const char *options) if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) return (0); - if ((zfsp = zfs_open(zpool_get_name(zhp), ZFS_TYPE_FILESYSTEM)) == NULL) + if ((zfsp = zfs_open(g_zfs, zpool_get_name(zhp), + ZFS_TYPE_FILESYSTEM)) == NULL) return (-1); - if (do_mount_share(zfsp, &cb) != 0 || cb.cb_failed != 0) { - zfs_close(zfsp); + if (do_mount_share(zfsp, &cb) != 0 || cb.cb_failed != 0) return (-1); - } - - zfs_close(zfsp); return (0); } diff --git a/usr/src/cmd/zpool/zpool_iter.c b/usr/src/cmd/zpool/zpool_iter.c index f99396da81..4a0a9ef162 100644 --- a/usr/src/cmd/zpool/zpool_iter.c +++ b/usr/src/cmd/zpool/zpool_iter.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,7 +49,7 @@ typedef struct zpool_node { } zpool_node_t; struct zpool_list { - int zl_findall; + boolean_t zl_findall; uu_avl_t *zl_avl; uu_avl_pool_t *zl_pool; }; @@ -114,18 +113,18 @@ pool_list_get(int argc, char **argv, int *err) no_memory(); if (argc == 0) { - (void) zpool_iter(add_pool, zlp); - zlp->zl_findall = TRUE; + (void) zpool_iter(g_zfs, add_pool, zlp); + zlp->zl_findall = B_TRUE; } else { int i; for (i = 0; i < argc; i++) { zpool_handle_t *zhp; - if ((zhp = zpool_open_canfail(argv[i])) != NULL) + if ((zhp = zpool_open_canfail(g_zfs, argv[i])) != NULL) (void) add_pool(zhp, zlp); else - *err = TRUE; + *err = B_TRUE; } } @@ -141,7 +140,7 @@ void pool_list_update(zpool_list_t *zlp) { if (zlp->zl_findall) - (void) zpool_iter(add_pool, zlp); + (void) zpool_iter(g_zfs, add_pool, zlp); } /* @@ -223,7 +222,7 @@ pool_list_count(zpool_list_t *zlp) * using the pool_list_* interfaces. */ int -for_each_pool(int argc, char **argv, int unavail, zpool_iter_f func, +for_each_pool(int argc, char **argv, boolean_t unavail, zpool_iter_f func, void *data) { zpool_list_t *list; diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index e2297b24aa..c963776a9f 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -51,6 +52,7 @@ static int zpool_do_create(int, char **); static int zpool_do_destroy(int, char **); static int zpool_do_add(int, char **); +static int zpool_do_remove(int, char **); static int zpool_do_list(int, char **); static int zpool_do_iostat(int, char **); @@ -76,7 +78,7 @@ static int zpool_do_upgrade(int, char **); * debugging facilities. */ const char * -_umem_debug_init() +_umem_debug_init(void) { return ("default,verbose"); /* $UMEM_DEBUG setting */ } @@ -101,6 +103,7 @@ typedef enum { HELP_OFFLINE, HELP_ONLINE, HELP_REPLACE, + HELP_REMOVE, HELP_SCRUB, HELP_STATUS, HELP_UPGRADE @@ -127,6 +130,7 @@ static zpool_command_t command_table[] = { { "destroy", zpool_do_destroy, HELP_DESTROY }, { NULL }, { "add", zpool_do_add, HELP_ADD }, + { "remove", zpool_do_remove, HELP_REMOVE }, { NULL }, { "list", zpool_do_list, HELP_LIST }, { "iostat", zpool_do_iostat, HELP_IOSTAT }, @@ -188,6 +192,8 @@ get_usage(zpool_help_t idx) { case HELP_REPLACE: return (gettext("\treplace [-f] <pool> <device> " "[new_device]\n")); + case HELP_REMOVE: + return (gettext("\tremove <pool> <device>\n")); case HELP_SCRUB: return (gettext("\tscrub [-s] <pool> ...\n")); case HELP_STATUS: @@ -253,7 +259,7 @@ static char *column_subopts[] = { * a complete usage message. */ void -usage(int requested) +usage(boolean_t requested) { int i; FILE *fp = requested ? stdout : stderr; @@ -324,7 +330,7 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent) return; for (c = 0; c < children; c++) { - vname = zpool_vdev_name(zhp, child[c]); + vname = zpool_vdev_name(g_zfs, zhp, child[c]); print_vdev_tree(zhp, vname, child[c], indent + 2); free(vname); } @@ -344,8 +350,8 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent) int zpool_do_add(int argc, char **argv) { - int force = FALSE; - int dryrun = FALSE; + boolean_t force = B_FALSE; + boolean_t dryrun = B_FALSE; int c; nvlist_t *nvroot; char *poolname; @@ -357,15 +363,15 @@ zpool_do_add(int argc, char **argv) while ((c = getopt(argc, argv, "fn")) != -1) { switch (c) { case 'f': - force = TRUE; + force = B_TRUE; break; case 'n': - dryrun = TRUE; + dryrun = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -375,11 +381,11 @@ zpool_do_add(int argc, char **argv) /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing vdev specification\n")); - usage(FALSE); + usage(B_FALSE); } poolname = argv[0]; @@ -387,7 +393,7 @@ zpool_do_add(int argc, char **argv) argc--; argv++; - if ((zhp = zpool_open(poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); if ((config = zpool_get_config(zhp, NULL)) == NULL) { @@ -398,7 +404,7 @@ zpool_do_add(int argc, char **argv) } /* pass off to get_vdev_spec for processing */ - nvroot = make_root_vdev(config, force, !force, argc, argv); + nvroot = make_root_vdev(config, force, !force, B_FALSE, argc, argv); if (nvroot == NULL) { zpool_close(zhp); return (1); @@ -421,6 +427,46 @@ zpool_do_add(int argc, char **argv) ret = (zpool_add(zhp, nvroot) != 0); } + nvlist_free(nvroot); + zpool_close(zhp); + + return (ret); +} + +/* + * zpool remove <pool> <vdev> + * + * Removes the given vdev from the pool. Currently, this only supports removing + * spares from the pool. Eventually, we'll want to support removing leaf vdevs + * (as an alias for 'detach') as well as toplevel vdevs. + */ +int +zpool_do_remove(int argc, char **argv) +{ + char *poolname; + int ret; + zpool_handle_t *zhp; + + argc--; + argv++; + + /* get pool name and check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing device\n")); + usage(B_FALSE); + } + + poolname = argv[0]; + + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + ret = (zpool_vdev_remove(zhp, argv[1]) != 0); + return (ret); } @@ -442,23 +488,25 @@ zpool_do_add(int argc, char **argv) int zpool_do_create(int argc, char **argv) { - int force = FALSE; - int dryrun = FALSE; + boolean_t force = B_FALSE; + boolean_t dryrun = B_FALSE; int c; nvlist_t *nvroot; char *poolname; int ret; char *altroot = NULL; char *mountpoint = NULL; + nvlist_t **child; + uint_t children; /* check options */ while ((c = getopt(argc, argv, ":fnR:m:")) != -1) { switch (c) { case 'f': - force = TRUE; + force = B_TRUE; break; case 'n': - dryrun = TRUE; + dryrun = B_TRUE; break; case 'R': altroot = optarg; @@ -469,12 +517,12 @@ zpool_do_create(int argc, char **argv) case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -484,11 +532,11 @@ zpool_do_create(int argc, char **argv) /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing vdev specification\n")); - usage(FALSE); + usage(B_FALSE); } poolname = argv[0]; @@ -506,13 +554,26 @@ zpool_do_create(int argc, char **argv) } /* pass off to get_vdev_spec for bulk processing */ - nvroot = make_root_vdev(NULL, force, !force, argc - 1, argv + 1); + nvroot = make_root_vdev(NULL, force, !force, B_FALSE, argc - 1, + argv + 1); if (nvroot == NULL) return (1); + /* make_root_vdev() allows 0 toplevel children if there are spares */ + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0); + if (children == 0) { + (void) fprintf(stderr, gettext("invalid vdev " + "specification: at least one toplevel vdev must be " + "specified\n")); + return (1); + } + + if (altroot != NULL && altroot[0] != '/') { (void) fprintf(stderr, gettext("invalid alternate root '%s': " "must be an absolute path\n")); + nvlist_free(nvroot); return (1); } @@ -530,6 +591,7 @@ zpool_do_create(int argc, char **argv) (void) fprintf(stderr, gettext("invalid mountpoint " "'%s': must be an absolute path, 'legacy', or " "'none'\n"), mountpoint); + nvlist_free(nvroot); return (1); } @@ -560,6 +622,7 @@ zpool_do_create(int argc, char **argv) "'%s' exists and is not empty\n"), buf); (void) fprintf(stderr, gettext("use '-m' " "option to provide a different default\n")); + nvlist_free(nvroot); return (1); } } @@ -570,8 +633,6 @@ zpool_do_create(int argc, char **argv) * For a dry run invocation, print out a basic message and run * through all the vdevs in the list and print out in an * appropriate hierarchy. - * - * XXZFS find out of we can create the pool? */ (void) printf(gettext("would create '%s' with the " "following layout:\n\n"), poolname); @@ -584,8 +645,8 @@ zpool_do_create(int argc, char **argv) /* * Hand off to libzfs. */ - if (zpool_create(poolname, nvroot, altroot) == 0) { - zfs_handle_t *pool = zfs_open(poolname, + if (zpool_create(g_zfs, poolname, nvroot, altroot) == 0) { + zfs_handle_t *pool = zfs_open(g_zfs, poolname, ZFS_TYPE_FILESYSTEM); if (pool != NULL) { if (mountpoint != NULL) @@ -596,8 +657,10 @@ zpool_do_create(int argc, char **argv) ret = zfs_share(pool); zfs_close(pool); } + } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) { + (void) fprintf(stderr, gettext("pool name may have " + "been omitted\n")); } - } nvlist_free(nvroot); @@ -615,7 +678,7 @@ zpool_do_create(int argc, char **argv) int zpool_do_destroy(int argc, char **argv) { - int force = FALSE; + boolean_t force = B_FALSE; int c; char *pool; zpool_handle_t *zhp; @@ -625,12 +688,12 @@ zpool_do_destroy(int argc, char **argv) while ((c = getopt(argc, argv, "f")) != -1) { switch (c) { case 'f': - force = TRUE; + force = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -640,16 +703,16 @@ zpool_do_destroy(int argc, char **argv) /* check arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } pool = argv[0]; - if ((zhp = zpool_open_canfail(pool)) == NULL) { + if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) { /* * As a special case, check for use of '/' in the name, and * direct the user to use 'zfs destroy' instead. @@ -685,7 +748,7 @@ zpool_do_destroy(int argc, char **argv) int zpool_do_export(int argc, char **argv) { - int force = FALSE; + boolean_t force = B_FALSE; int c; zpool_handle_t *zhp; int ret; @@ -695,12 +758,12 @@ zpool_do_export(int argc, char **argv) while ((c = getopt(argc, argv, "f")) != -1) { switch (c) { case 'f': - force = TRUE; + force = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -710,12 +773,12 @@ zpool_do_export(int argc, char **argv) /* check arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool argument\n")); - usage(FALSE); + usage(B_FALSE); } ret = 0; for (i = 0; i < argc; i++) { - if ((zhp = zpool_open_canfail(argv[i])) == NULL) { + if ((zhp = zpool_open_canfail(g_zfs, argv[i])) == NULL) { ret = 1; continue; } @@ -742,7 +805,7 @@ zpool_do_export(int argc, char **argv) static int max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max) { - char *name = zpool_vdev_name(zhp, nv); + char *name = zpool_vdev_name(g_zfs, zhp, nv); nvlist_t **child; uint_t c, children; int ret; @@ -752,13 +815,22 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max) free(name); + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if ((ret = max_width(zhp, child[c], depth + 2, + max)) > max) + max = ret; + } + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) - return (max); + &child, &children) == 0) { + for (c = 0; c < children; c++) + if ((ret = max_width(zhp, child[c], depth + 2, + max)) > max) + max = ret; + } - for (c = 0; c < children; c++) - if ((ret = max_width(zhp, child[c], depth + 2, max)) > max) - max = ret; return (max); } @@ -819,11 +891,22 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) return; for (c = 0; c < children; c++) { - vname = zpool_vdev_name(NULL, child[c]); + vname = zpool_vdev_name(g_zfs, NULL, child[c]); print_import_config(vname, child[c], namewidth, depth + 2); free(vname); } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) != 0) + return; + + (void) printf(gettext("\tspares\n")); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, NULL, child[c]); + (void) printf("\t %s\n", vname); + free(vname); + } } /* @@ -1009,13 +1092,13 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, return (1); } - if (zpool_import(config, newname, altroot) != 0) + if (zpool_import(g_zfs, config, newname, altroot) != 0) return (1); if (newname != NULL) name = (char *)newname; - verify((zhp = zpool_open(name)) != NULL); + verify((zhp = zpool_open(g_zfs, name)) != NULL); if (mount_share_datasets(zhp, mntopts) != 0) { zpool_close(zhp); @@ -1056,24 +1139,24 @@ zpool_do_import(int argc, char **argv) int c; int err; nvlist_t *pools; - int do_all = FALSE; - int do_destroyed = FALSE; + boolean_t do_all = B_FALSE; + boolean_t do_destroyed = B_FALSE; char *altroot = NULL; char *mntopts = NULL; - int do_force = FALSE; + boolean_t do_force = B_FALSE; nvpair_t *elem; nvlist_t *config; uint64_t searchguid; char *searchname; nvlist_t *found_config; - int first; + boolean_t first; uint64_t pool_state; /* check options */ while ((c = getopt(argc, argv, ":Dfd:R:ao:")) != -1) { switch (c) { case 'a': - do_all = TRUE; + do_all = B_TRUE; break; case 'd': if (searchdirs == NULL) { @@ -1089,10 +1172,10 @@ zpool_do_import(int argc, char **argv) searchdirs[nsearch++] = optarg; break; case 'D': - do_destroyed = TRUE; + do_destroyed = B_TRUE; break; case 'f': - do_force = TRUE; + do_force = B_TRUE; break; case 'o': mntopts = optarg; @@ -1103,12 +1186,12 @@ zpool_do_import(int argc, char **argv) case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1125,12 +1208,12 @@ zpool_do_import(int argc, char **argv) if (do_all) { if (argc != 0) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } } else { if (argc > 2) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } /* @@ -1141,12 +1224,15 @@ zpool_do_import(int argc, char **argv) if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) { (void) fprintf(stderr, gettext("cannot " "discover pools: permission denied\n")); + free(searchdirs); return (1); } } - if ((pools = zpool_find_import(nsearch, searchdirs)) == NULL) + if ((pools = zpool_find_import(g_zfs, nsearch, searchdirs)) == NULL) { + free(searchdirs); return (1); + } /* * We now have a list of all available pools in the given directories. @@ -1176,7 +1262,7 @@ zpool_do_import(int argc, char **argv) err = 0; elem = NULL; - first = TRUE; + first = B_TRUE; while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { verify(nvpair_value_nvlist(elem, &config) == 0); @@ -1190,7 +1276,7 @@ zpool_do_import(int argc, char **argv) if (argc == 0) { if (first) - first = FALSE; + first = B_FALSE; else (void) printf("\n"); @@ -1215,7 +1301,7 @@ zpool_do_import(int argc, char **argv) "one matching pool\n"), searchname); (void) fprintf(stderr, gettext( "import by numeric ID instead\n")); - err = TRUE; + err = B_TRUE; } found_config = config; } @@ -1241,7 +1327,7 @@ zpool_do_import(int argc, char **argv) if (found_config == NULL) { (void) fprintf(stderr, gettext("cannot import '%s': " "no such pool available\n"), argv[0]); - err = TRUE; + err = B_TRUE; } else { err |= do_import(found_config, argc == 1 ? NULL : argv[1], mntopts, altroot, do_force); @@ -1257,6 +1343,7 @@ zpool_do_import(int argc, char **argv) gettext("no pools available to import\n")); nvlist_free(pools); + free(searchdirs); return (err ? 1 : 0); } @@ -1374,7 +1461,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, return; for (c = 0; c < children; c++) { - vname = zpool_vdev_name(zhp, newchild[c]); + vname = zpool_vdev_name(g_zfs, zhp, newchild[c]); print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); @@ -1476,19 +1563,19 @@ zpool_do_iostat(int argc, char **argv) int npools; unsigned long interval = 0, count = 0; zpool_list_t *list; - int verbose = FALSE; + boolean_t verbose = B_FALSE; iostat_cbdata_t cb; /* check options */ while ((c = getopt(argc, argv, "v")) != -1) { switch (c) { case 'v': - verbose = TRUE; + verbose = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1508,7 +1595,7 @@ zpool_do_iostat(int argc, char **argv) if (interval == 0) { (void) fprintf(stderr, gettext("interval " "cannot be zero\n")); - usage(FALSE); + usage(B_FALSE); } /* @@ -1540,7 +1627,7 @@ zpool_do_iostat(int argc, char **argv) if (interval == 0) { (void) fprintf(stderr, gettext("interval " "cannot be zero\n")); - usage(FALSE); + usage(B_FALSE); } /* @@ -1559,10 +1646,13 @@ zpool_do_iostat(int argc, char **argv) if ((list = pool_list_get(argc, argv, &ret)) == NULL) return (1); - if (pool_list_count(list) == 0 && argc != 0) + if (pool_list_count(list) == 0 && argc != 0) { + pool_list_free(list); return (1); + } if (pool_list_count(list) == 0 && interval == 0) { + pool_list_free(list); (void) fprintf(stderr, gettext("no pools available\n")); return (1); } @@ -1586,14 +1676,14 @@ zpool_do_iostat(int argc, char **argv) * before calculating the maximum name width, so that any * configuration changes are properly accounted for. */ - (void) pool_list_iter(list, FALSE, refresh_iostat, &cb); + (void) pool_list_iter(list, B_FALSE, refresh_iostat, &cb); /* * Iterate over all pools to determine the maximum width * for the pool / device name column across all pools. */ cb.cb_namewidth = 0; - (void) pool_list_iter(list, FALSE, get_namewidth, &cb); + (void) pool_list_iter(list, B_FALSE, get_namewidth, &cb); /* * If it's the first time, or verbose mode, print the header. @@ -1601,7 +1691,7 @@ zpool_do_iostat(int argc, char **argv) if (++cb.cb_iteration == 1 || verbose) print_iostat_header(&cb); - (void) pool_list_iter(list, FALSE, print_iostat, &cb); + (void) pool_list_iter(list, B_FALSE, print_iostat, &cb); /* * If there's more than one pool, and we're not in verbose mode @@ -1628,10 +1718,10 @@ zpool_do_iostat(int argc, char **argv) } typedef struct list_cbdata { - int cb_scripted; - int cb_first; - int cb_fields[MAX_FIELDS]; - int cb_fieldcount; + boolean_t cb_scripted; + boolean_t cb_first; + int cb_fields[MAX_FIELDS]; + int cb_fieldcount; } list_cbdata_t; /* @@ -1675,7 +1765,7 @@ list_callback(zpool_handle_t *zhp, void *data) if (cbp->cb_first) { if (!cbp->cb_scripted) print_header(cbp->cb_fields, cbp->cb_fieldcount); - cbp->cb_first = FALSE; + cbp->cb_first = B_FALSE; } if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { @@ -1803,7 +1893,7 @@ zpool_do_list(int argc, char **argv) while ((c = getopt(argc, argv, ":Ho:")) != -1) { switch (c) { case 'H': - cb.cb_scripted = TRUE; + cb.cb_scripted = B_TRUE; break; case 'o': fields = optarg; @@ -1811,12 +1901,12 @@ zpool_do_list(int argc, char **argv) case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); - usage(FALSE); + usage(B_FALSE); break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1827,23 +1917,23 @@ zpool_do_list(int argc, char **argv) if (cb.cb_fieldcount == MAX_FIELDS) { (void) fprintf(stderr, gettext("too many " "properties given to -o option\n")); - usage(FALSE); + usage(B_FALSE); } if ((cb.cb_fields[cb.cb_fieldcount] = getsubopt(&fields, column_subopts, &value)) == -1) { (void) fprintf(stderr, gettext("invalid property " "'%s'\n"), value); - usage(FALSE); + usage(B_FALSE); } cb.cb_fieldcount++; } - cb.cb_first = TRUE; + cb.cb_first = B_TRUE; - ret = for_each_pool(argc, argv, TRUE, list_callback, &cb); + ret = for_each_pool(argc, argv, B_TRUE, list_callback, &cb); if (argc == 0 && cb.cb_first) { (void) printf(gettext("no pools available\n")); @@ -1883,23 +1973,24 @@ zpool_get_vdev_by_name(nvlist_t *nv, char *name) static int zpool_do_attach_or_replace(int argc, char **argv, int replacing) { - int force = FALSE; + boolean_t force = B_FALSE; int c; nvlist_t *nvroot; char *poolname, *old_disk, *new_disk; zpool_handle_t *zhp; nvlist_t *config; + int ret; /* check options */ while ((c = getopt(argc, argv, "f")) != -1) { switch (c) { case 'f': - force = TRUE; + force = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -1909,7 +2000,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(FALSE); + usage(B_FALSE); } poolname = argv[0]; @@ -1917,7 +2008,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) if (argc < 2) { (void) fprintf(stderr, gettext("missing <device> specification\n")); - usage(FALSE); + usage(B_FALSE); } old_disk = argv[1]; @@ -1926,7 +2017,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) if (!replacing) { (void) fprintf(stderr, gettext("missing <new_device> specification\n")); - usage(FALSE); + usage(B_FALSE); } new_disk = old_disk; argc -= 1; @@ -1939,10 +2030,10 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } - if ((zhp = zpool_open(poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); if ((config = zpool_get_config(zhp, NULL)) == NULL) { @@ -1952,13 +2043,18 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) return (1); } - nvroot = make_root_vdev(config, force, B_FALSE, argc, argv); + nvroot = make_root_vdev(config, force, B_FALSE, replacing, argc, argv); if (nvroot == NULL) { zpool_close(zhp); return (1); } - return (zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing)); + ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing); + + nvlist_free(nvroot); + zpool_close(zhp); + + return (ret); } /* @@ -2008,6 +2104,7 @@ zpool_do_detach(int argc, char **argv) int c; char *poolname, *path; zpool_handle_t *zhp; + int ret; /* check options */ while ((c = getopt(argc, argv, "f")) != -1) { @@ -2016,7 +2113,7 @@ zpool_do_detach(int argc, char **argv) case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -2026,22 +2123,26 @@ zpool_do_detach(int argc, char **argv) /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing <device> specification\n")); - usage(FALSE); + usage(B_FALSE); } poolname = argv[0]; path = argv[1]; - if ((zhp = zpool_open(poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); - return (zpool_vdev_detach(zhp, path)); + ret = zpool_vdev_detach(zhp, path); + + zpool_close(zhp); + + return (ret); } /* @@ -2063,7 +2164,7 @@ zpool_do_online(int argc, char **argv) case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -2073,16 +2174,16 @@ zpool_do_online(int argc, char **argv) /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing device name\n")); - usage(FALSE); + usage(B_FALSE); } poolname = argv[0]; - if ((zhp = zpool_open(poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); for (i = 1; i < argc; i++) @@ -2092,6 +2193,8 @@ zpool_do_online(int argc, char **argv) else ret = 1; + zpool_close(zhp); + return (ret); } @@ -2112,19 +2215,20 @@ zpool_do_offline(int argc, char **argv) int c, i; char *poolname; zpool_handle_t *zhp; - int ret = 0, istmp = FALSE; + int ret = 0; + boolean_t istmp = B_FALSE; /* check options */ while ((c = getopt(argc, argv, "ft")) != -1) { switch (c) { case 't': - istmp = TRUE; + istmp = B_TRUE; break; case 'f': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -2134,16 +2238,16 @@ zpool_do_offline(int argc, char **argv) /* get pool name and check number of arguments */ if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name\n")); - usage(FALSE); + usage(B_FALSE); } if (argc < 2) { (void) fprintf(stderr, gettext("missing device name\n")); - usage(FALSE); + usage(B_FALSE); } poolname = argv[0]; - if ((zhp = zpool_open(poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) return (1); for (i = 1; i < argc; i++) @@ -2153,6 +2257,8 @@ zpool_do_offline(int argc, char **argv) else ret = 1; + zpool_close(zhp); + return (ret); } @@ -2170,18 +2276,18 @@ zpool_do_clear(int argc, char **argv) if (argc < 2) { (void) fprintf(stderr, gettext("missing pool name\n")); - usage(FALSE); + usage(B_FALSE); } if (argc > 3) { (void) fprintf(stderr, gettext("too many arguments\n")); - usage(FALSE); + usage(B_FALSE); } pool = argv[1]; device = argc == 3 ? argv[2] : NULL; - if ((zhp = zpool_open(pool)) == NULL) + if ((zhp = zpool_open(g_zfs, pool)) == NULL) return (1); if (zpool_clear(zhp, device) != 0) @@ -2235,7 +2341,7 @@ zpool_do_scrub(int argc, char **argv) case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -2244,17 +2350,17 @@ zpool_do_scrub(int argc, char **argv) if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); - usage(FALSE); + usage(B_FALSE); } - return (for_each_pool(argc, argv, TRUE, scrub_callback, &cb)); + return (for_each_pool(argc, argv, B_TRUE, scrub_callback, &cb)); } typedef struct status_cbdata { - int cb_verbose; - int cb_explain; - int cb_count; - int cb_first; + int cb_count; + boolean_t cb_verbose; + boolean_t cb_explain; + boolean_t cb_first; } status_cbdata_t; /* @@ -2311,12 +2417,57 @@ print_scrub_status(nvlist_t *nvroot) (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60)); } +typedef struct spare_cbdata { + uint64_t cb_guid; + zpool_handle_t *cb_zhp; +} spare_cbdata_t; + +static boolean_t +find_vdev(nvlist_t *nv, uint64_t search) +{ + uint64_t guid; + nvlist_t **child; + uint_t c, children; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && + search == guid) + return (B_TRUE); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if (find_vdev(child[c], search)) + return (B_TRUE); + } + + return (B_FALSE); +} + +static int +find_spare(zpool_handle_t *zhp, void *data) +{ + spare_cbdata_t *cbp = data; + nvlist_t *config, *nvroot; + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + if (find_vdev(nvroot, cbp->cb_guid)) { + cbp->cb_zhp = zhp; + return (1); + } + + zpool_close(zhp); + return (0); +} + /* * Print out configuration state as requested by status_callback. */ void print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, - int namewidth, int depth) + int namewidth, int depth, boolean_t isspare) { nvlist_t **child; uint_t c, children; @@ -2324,6 +2475,8 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, char rbuf[6], wbuf[6], cbuf[6], repaired[7]; char *vname; uint64_t notpresent; + spare_cbdata_t cb; + const char *state; verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &c) == 0); @@ -2332,13 +2485,27 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, &child, &children) != 0) children = 0; + state = state_to_name(vs); + if (isspare) { + /* + * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for + * online drives. + */ + if (vs->vs_aux == VDEV_AUX_SPARED) + state = "INUSE"; + else if (vs->vs_state == VDEV_STATE_HEALTHY) + state = "AVAIL"; + } + (void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth, - name, state_to_name(vs)); + name, state); - zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); - zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); - zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); - (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf); + if (!isspare) { + zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); + zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); + zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); + (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf); + } if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, ¬present) == 0) { @@ -2365,6 +2532,24 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, (void) printf(gettext("newer version")); break; + case VDEV_AUX_SPARED: + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, + &cb.cb_guid) == 0); + if (zpool_iter(g_zfs, find_spare, &cb) == 1) { + if (strcmp(zpool_get_name(cb.cb_zhp), + zpool_get_name(zhp)) == 0) + (void) printf(gettext("currently in " + "use")); + else + (void) printf(gettext("in use by " + "pool '%s'"), + zpool_get_name(cb.cb_zhp)); + zpool_close(cb.cb_zhp); + } else { + (void) printf(gettext("currently in use")); + } + break; + default: (void) printf(gettext("corrupted data")); break; @@ -2382,9 +2567,9 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, (void) printf("\n"); for (c = 0; c < children; c++) { - vname = zpool_vdev_name(zhp, child[c]); + vname = zpool_vdev_name(g_zfs, zhp, child[c]); print_status_config(zhp, vname, child[c], - namewidth, depth + 2); + namewidth, depth + 2, isspare); free(vname); } } @@ -2443,6 +2628,26 @@ print_error_log(zpool_handle_t *zhp) } } +static void +print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares, + int namewidth) +{ + uint_t i; + char *name; + + if (nspares == 0) + return; + + (void) printf(gettext("\tspares\n")); + + for (i = 0; i < nspares; i++) { + name = zpool_vdev_name(g_zfs, zhp, spares[i]); + print_status_config(zhp, name, spares[i], + namewidth, 2, B_TRUE); + free(name); + } +} + /* * Display a summary of pool status. Displays a summary such as: * @@ -2480,7 +2685,7 @@ status_callback(zpool_handle_t *zhp, void *data) return (0); if (cbp->cb_first) - cbp->cb_first = FALSE; + cbp->cb_first = B_FALSE; else (void) printf("\n"); @@ -2603,6 +2808,8 @@ status_callback(zpool_handle_t *zhp, void *data) int namewidth; uint64_t nerr; size_t realerr; + nvlist_t **spares; + uint_t nspares; verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); @@ -2618,7 +2825,11 @@ status_callback(zpool_handle_t *zhp, void *data) (void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth, "NAME", "STATE", "READ", "WRITE", "CKSUM"); print_status_config(zhp, zpool_get_name(zhp), nvroot, - namewidth, 0); + namewidth, 0, B_FALSE); + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) + print_spares(zhp, spares, nspares, namewidth); if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, &nerr) == 0) { @@ -2632,6 +2843,7 @@ status_callback(zpool_handle_t *zhp, void *data) nerr = realerr; (void) printf("\n"); + if (nerr == 0) (void) printf(gettext("errors: No known data " "errors\n")); @@ -2668,24 +2880,24 @@ zpool_do_status(int argc, char **argv) while ((c = getopt(argc, argv, "vx")) != -1) { switch (c) { case 'v': - cb.cb_verbose = TRUE; + cb.cb_verbose = B_TRUE; break; case 'x': - cb.cb_explain = TRUE; + cb.cb_explain = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } argc -= optind; argv += optind; - cb.cb_first = TRUE; + cb.cb_first = B_TRUE; - ret = for_each_pool(argc, argv, TRUE, status_callback, &cb); + ret = for_each_pool(argc, argv, B_TRUE, status_callback, &cb); if (argc == 0 && cb.cb_count == 0) (void) printf(gettext("no pools available\n")); @@ -2731,13 +2943,13 @@ upgrade_cb(zpool_handle_t *zhp, void *arg) "versions.\n\n")); (void) printf(gettext("VER POOL\n")); (void) printf(gettext("--- ------------\n")); - cbp->cb_first = FALSE; + cbp->cb_first = B_FALSE; } (void) printf("%2llu %s\n", version, zpool_get_name(zhp)); } else { - cbp->cb_first = FALSE; + cbp->cb_first = B_FALSE; ret = zpool_upgrade(zhp); if (ret == 0) (void) printf(gettext("Successfully upgraded " @@ -2752,7 +2964,7 @@ upgrade_cb(zpool_handle_t *zhp, void *arg) "cannot be accessed on the current system.\n\n")); (void) printf(gettext("VER POOL\n")); (void) printf(gettext("--- ------------\n")); - cbp->cb_first = FALSE; + cbp->cb_first = B_FALSE; } (void) printf("%2llu %s\n", version, @@ -2811,7 +3023,7 @@ zpool_do_upgrade(int argc, char **argv) while ((c = getopt(argc, argv, "av")) != -1) { switch (c) { case 'a': - cb.cb_all = TRUE; + cb.cb_all = B_TRUE; break; case 'v': showversions = B_TRUE; @@ -2819,7 +3031,7 @@ zpool_do_upgrade(int argc, char **argv) case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); - usage(FALSE); + usage(B_FALSE); } } @@ -2830,28 +3042,30 @@ zpool_do_upgrade(int argc, char **argv) if (cb.cb_all || argc != 0) { (void) fprintf(stderr, gettext("-v option is " "incompatible with other arguments\n")); - usage(FALSE); + usage(B_FALSE); } } else if (cb.cb_all) { if (argc != 0) { (void) fprintf(stderr, gettext("-a option is " "incompatible with other arguments\n")); - usage(FALSE); + usage(B_FALSE); } } (void) printf(gettext("This system is currently running ZFS version " "%llu.\n\n"), ZFS_VERSION); - cb.cb_first = TRUE; + cb.cb_first = B_TRUE; if (showversions) { (void) printf(gettext("The following versions are " "suppored:\n\n")); (void) printf(gettext("VER DESCRIPTION\n")); (void) printf("--- -----------------------------------------" "---------------\n"); - (void) printf(gettext(" 1 Initial ZFS version.\n")); + (void) printf(gettext(" 1 Initial ZFS version\n")); (void) printf(gettext(" 2 Ditto blocks " "(replicated metadata)\n")); + (void) printf(gettext(" 3 Hot spares and double parity " + "RAID-Z\n")); (void) printf(gettext("\nFor more information on a particular " "version, including supported releases, see:\n\n")); (void) printf("http://www.opensolaris.org/os/community/zfs/" @@ -2860,7 +3074,7 @@ zpool_do_upgrade(int argc, char **argv) } else if (argc == 0) { int notfound; - ret = zpool_iter(upgrade_cb, &cb); + ret = zpool_iter(g_zfs, upgrade_cb, &cb); notfound = cb.cb_first; if (!cb.cb_all && ret == 0) { @@ -2868,7 +3082,7 @@ zpool_do_upgrade(int argc, char **argv) (void) printf("\n"); cb.cb_first = B_TRUE; cb.cb_newer = B_TRUE; - ret = zpool_iter(upgrade_cb, &cb); + ret = zpool_iter(g_zfs, upgrade_cb, &cb); if (!cb.cb_first) { notfound = B_FALSE; (void) printf("\n"); @@ -2885,7 +3099,7 @@ zpool_do_upgrade(int argc, char **argv) "their associated\nfeatures.\n")); } } else { - ret = for_each_pool(argc, argv, FALSE, upgrade_one, NULL); + ret = for_each_pool(argc, argv, B_FALSE, upgrade_one, NULL); } return (ret); @@ -2901,6 +3115,14 @@ main(int argc, char **argv) (void) setlocale(LC_ALL, ""); (void) textdomain(TEXT_DOMAIN); + if ((g_zfs = libzfs_init()) == NULL) { + (void) fprintf(stderr, gettext("internal error: failed to " + "initialize ZFS library")); + return (1); + } + + libzfs_print_on_error(g_zfs, B_TRUE); + opterr = 0; /* @@ -2908,7 +3130,7 @@ main(int argc, char **argv) */ if (argc < 2) { (void) fprintf(stderr, gettext("missing command\n")); - usage(FALSE); + usage(B_FALSE); } cmdname = argv[1]; @@ -2917,7 +3139,7 @@ main(int argc, char **argv) * Special case '-?' */ if (strcmp(cmdname, "-?") == 0) - usage(TRUE); + usage(B_TRUE); /* * Run the appropriate command. @@ -2946,9 +3168,11 @@ main(int argc, char **argv) if (i == NCOMMAND) { (void) fprintf(stderr, gettext("unrecognized " "command '%s'\n"), cmdname); - usage(FALSE); + usage(B_FALSE); } + libzfs_fini(g_zfs); + /* * The 'ZFS_ABORT' environment variable causes us to dump core on exit * for the purposes of running ::findleaks. diff --git a/usr/src/cmd/zpool/zpool_util.h b/usr/src/cmd/zpool/zpool_util.h index b2243e8f08..3cb91756de 100644 --- a/usr/src/cmd/zpool/zpool_util.h +++ b/usr/src/cmd/zpool/zpool_util.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -47,12 +46,12 @@ void no_memory(void); * Virtual device functions */ nvlist_t *make_root_vdev(nvlist_t *poolconfig, int force, int check_rep, - int argc, char **argv); + boolean_t isreplace, int argc, char **argv); /* * Pool list functions */ -int for_each_pool(int, char **, int unavail, zpool_iter_f, void *); +int for_each_pool(int, char **, boolean_t unavail, zpool_iter_f, void *); typedef struct zpool_list zpool_list_t; @@ -69,6 +68,8 @@ void pool_list_remove(zpool_list_t *, zpool_handle_t *); int unmount_datasets(zpool_handle_t *, int); int mount_share_datasets(zpool_handle_t *, const char *); +libzfs_handle_t *g_zfs; + #ifdef __cplusplus } #endif diff --git a/usr/src/cmd/zpool/zpool_vdev.c b/usr/src/cmd/zpool/zpool_vdev.c index 6fba820d10..fa106dffb9 100644 --- a/usr/src/cmd/zpool/zpool_vdev.c +++ b/usr/src/cmd/zpool/zpool_vdev.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -34,14 +35,19 @@ * file=(path=...) * * Group vdevs - * raidz=(...) + * raidz[1|2]=(...) * mirror=(...) * + * Hot spares + * * While the underlying implementation supports it, group vdevs cannot contain * other group vdevs. All userland verification of devices is contained within * this file. If successful, the nvlist returned can be passed directly to the * kernel; we've done as much verification as possible in userland. * + * Hot spares are a special case, and passed down as an array of disk vdevs, at + * the same level as the root of the vdev tree. + * * The only function exported by this file is 'get_vdev_spec'. The function * performs several passes: * @@ -84,10 +90,11 @@ * vdev_error() function keeps track of whether we have seen an error yet, and * prints out a header if its the first error we've seen. */ -int error_seen; -int is_force; +boolean_t error_seen; +boolean_t is_force; -void +/*PRINTFLIKE1*/ +static void vdev_error(const char *fmt, ...) { va_list ap; @@ -100,7 +107,7 @@ vdev_error(const char *fmt, ...) else (void) fprintf(stderr, gettext("the following errors " "must be manually repaired:\n")); - error_seen = TRUE; + error_seen = B_TRUE; } va_start(ap, fmt); @@ -112,10 +119,10 @@ static void libdiskmgt_error(int error) { /* - * ENXIO is a valid error message if the device doesn't live in + * ENXIO/ENODEV is a valid error message if the device doesn't live in * /dev/dsk. Don't bother printing an error message in this case. */ - if (error == ENXIO) + if (error == ENXIO || error == ENODEV) return; (void) fprintf(stderr, gettext("warning: device in use checking " @@ -126,7 +133,7 @@ libdiskmgt_error(int error) * Validate a device, passing the bulk of the work off to libdiskmgt. */ int -check_slice(const char *path, int force, int wholedisk) +check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare) { char *msg; int error = 0; @@ -137,12 +144,18 @@ check_slice(const char *path, int force, int wholedisk) if (error != 0) { libdiskmgt_error(error); return (0); - } else { + } else if (!isspare || + strstr(msg, gettext("hot spare")) == NULL) { + /* + * The above check is a rather severe hack. It would + * probably make more sense to have DM_WHO_ZPOOL_SPARE + * instead. + */ vdev_error("%s", msg); free(msg); + ret = -1; } - ret = -1; } /* @@ -172,7 +185,7 @@ check_slice(const char *path, int force, int wholedisk) */ /* ARGSUSED */ int -check_disk(const char *name, dm_descriptor_t disk, int force) +check_disk(const char *name, dm_descriptor_t disk, int force, int isspare) { dm_descriptor_t *drive, *media, *slice; int err = 0; @@ -227,8 +240,12 @@ check_disk(const char *name, dm_descriptor_t disk, int force) * overlapping slices because we are using the whole disk. */ for (i = 0; slice[i] != NULL; i++) { - if (check_slice(dm_get_name(slice[i], &err), force, TRUE) != 0) + char *name = dm_get_name(slice[i], &err); + + if (check_slice(name, force, B_TRUE, isspare) != 0) ret = -1; + + dm_free_name(name); } dm_free_descriptors(slice); @@ -239,7 +256,7 @@ check_disk(const char *name, dm_descriptor_t disk, int force) * Validate a device. */ int -check_device(const char *path, int force) +check_device(const char *path, boolean_t force, boolean_t isspare) { dm_descriptor_t desc; int err; @@ -252,12 +269,12 @@ check_device(const char *path, int force) assert(dev != NULL); dev++; if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) { - err = check_disk(path, desc, force); + err = check_disk(path, desc, force, isspare); dm_free_descriptor(desc); return (err); } - return (check_slice(path, force, FALSE)); + return (check_slice(path, force, B_FALSE, isspare)); } /* @@ -265,17 +282,18 @@ check_device(const char *path, int force) * not in use by another pool. */ int -check_file(const char *file, int force) +check_file(const char *file, boolean_t force, boolean_t isspare) { char *name; int fd; int ret = 0; pool_state_t state; + boolean_t inuse; if ((fd = open(file, O_RDONLY)) < 0) return (0); - if (zpool_in_use(fd, &state, &name)) { + if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { const char *desc; switch (state) { @@ -296,9 +314,24 @@ check_file(const char *file, int force) break; } - if (state == POOL_STATE_ACTIVE || !force) { - vdev_error(gettext("%s is part of %s pool '%s'\n"), - file, desc, name); + /* + * Allow hot spares to be shared between pools. + */ + if (state == POOL_STATE_SPARE && isspare) + return (0); + + if (state == POOL_STATE_ACTIVE || + state == POOL_STATE_SPARE || !force) { + switch (state) { + case POOL_STATE_SPARE: + vdev_error(gettext("%s is reserved as a hot " + "spare for pool %s\n"), file, name); + break; + default: + vdev_error(gettext("%s is part of %s pool " + "'%s'\n"), file, desc, name); + break; + } ret = -1; } @@ -309,16 +342,16 @@ check_file(const char *file, int force) return (ret); } -static int +static boolean_t is_whole_disk(const char *arg, struct stat64 *statbuf) { char path[MAXPATHLEN]; (void) snprintf(path, sizeof (path), "%s%s", arg, BACKUP_SLICE); if (stat64(path, statbuf) == 0) - return (TRUE); + return (B_TRUE); - return (FALSE); + return (B_FALSE); } /* @@ -337,7 +370,7 @@ make_leaf_vdev(const char *arg) struct stat64 statbuf; nvlist_t *vdev = NULL; char *type = NULL; - int wholedisk = FALSE; + boolean_t wholedisk = B_FALSE; /* * Determine what type of vdev this is, and put the full path into @@ -350,7 +383,7 @@ make_leaf_vdev(const char *arg) * examining the file descriptor afterwards. */ if (is_whole_disk(arg, &statbuf)) { - wholedisk = TRUE; + wholedisk = B_TRUE; } else if (stat64(arg, &statbuf) != 0) { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), @@ -369,7 +402,7 @@ make_leaf_vdev(const char *arg) (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, arg); if (is_whole_disk(path, &statbuf)) { - wholedisk = TRUE; + wholedisk = B_TRUE; } else if (stat64(path, &statbuf) != 0) { /* * If we got ENOENT, then the user gave us @@ -472,8 +505,9 @@ make_leaf_vdev(const char *arg) * spec have consistent replication levels. */ typedef struct replication_level { - char *type; - int level; + char *zprl_type; + uint64_t zprl_children; + uint64_t zprl_parity; } replication_level_t; /* @@ -482,7 +516,7 @@ typedef struct replication_level { * an error message will be displayed for each self-inconsistent vdev. */ replication_level_t * -get_replication(nvlist_t *nvroot, int fatal) +get_replication(nvlist_t *nvroot, boolean_t fatal) { nvlist_t **top; uint_t t, toplevels; @@ -491,14 +525,14 @@ get_replication(nvlist_t *nvroot, int fatal) nvlist_t *nv; char *type; replication_level_t lastrep, rep, *ret; - int dontreport; + boolean_t dontreport; ret = safe_malloc(sizeof (replication_level_t)); verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &top, &toplevels) == 0); - lastrep.type = NULL; + lastrep.zprl_type = NULL; for (t = 0; t < toplevels; t++) { nv = top[t]; @@ -509,8 +543,9 @@ get_replication(nvlist_t *nvroot, int fatal) /* * This is a 'file' or 'disk' vdev. */ - rep.type = type; - rep.level = 1; + rep.zprl_type = type; + rep.zprl_children = 1; + rep.zprl_parity = 0; } else { uint64_t vdev_size; @@ -523,8 +558,17 @@ get_replication(nvlist_t *nvroot, int fatal) * We also check that the size of each vdev (if it can * be determined) is the same. */ - rep.type = type; - rep.level = 0; + rep.zprl_type = type; + rep.zprl_children = 0; + + if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { + verify(nvlist_lookup_uint64(nv, + ZPOOL_CONFIG_NPARITY, + &rep.zprl_parity) == 0); + assert(rep.zprl_parity != 0); + } else { + rep.zprl_parity = 0; + } /* * The 'dontreport' variable indicatest that we've @@ -542,7 +586,7 @@ get_replication(nvlist_t *nvroot, int fatal) char *childtype; int fd, err; - rep.level++; + rep.zprl_children++; verify(nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &childtype) == 0); @@ -563,10 +607,10 @@ get_replication(nvlist_t *nvroot, int fatal) "mismatched replication " "level: %s contains both " "files and devices\n"), - rep.type); + rep.zprl_type); else return (NULL); - dontreport = TRUE; + dontreport = B_TRUE; } /* @@ -611,10 +655,10 @@ get_replication(nvlist_t *nvroot, int fatal) vdev_error(gettext( "%s contains devices of " "different sizes\n"), - rep.type); + rep.zprl_type); else return (NULL); - dontreport = TRUE; + dontreport = B_TRUE; } type = childtype; @@ -627,30 +671,45 @@ get_replication(nvlist_t *nvroot, int fatal) * vdev in 'rep'. Compare it to 'lastrep' to see if its * different. */ - if (lastrep.type != NULL) { - if (strcmp(lastrep.type, rep.type) != 0) { + if (lastrep.zprl_type != NULL) { + if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) { if (ret != NULL) free(ret); ret = NULL; if (fatal) vdev_error(gettext( - "mismatched replication " - "level: both %s and %s vdevs are " + "mismatched replication level: " + "both %s and %s vdevs are " "present\n"), - lastrep.type, rep.type); + lastrep.zprl_type, rep.zprl_type); else return (NULL); - } else if (lastrep.level != rep.level) { + } else if (lastrep.zprl_parity != rep.zprl_parity) { if (ret) free(ret); ret = NULL; if (fatal) vdev_error(gettext( - "mismatched replication " - "level: %d-way %s and %d-way %s " + "mismatched replication level: " + "both %llu and %llu device parity " + "%s vdevs are present\n"), + lastrep.zprl_parity, + rep.zprl_parity, + rep.zprl_type); + else + return (NULL); + } else if (lastrep.zprl_children != rep.zprl_children) { + if (ret) + free(ret); + ret = NULL; + if (fatal) + vdev_error(gettext( + "mismatched replication level: " + "both %llu-way and %llu-way %s " "vdevs are present\n"), - lastrep.level, lastrep.type, - rep.level, rep.type); + lastrep.zprl_children, + rep.zprl_children, + rep.zprl_type); else return (NULL); } @@ -658,10 +717,8 @@ get_replication(nvlist_t *nvroot, int fatal) lastrep = rep; } - if (ret != NULL) { - ret->type = rep.type; - ret->level = rep.level; - } + if (ret != NULL) + *ret = rep; return (ret); } @@ -687,7 +744,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot) verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - if ((current = get_replication(nvroot, FALSE)) == NULL) + if ((current = get_replication(nvroot, B_FALSE)) == NULL) return (0); } @@ -695,7 +752,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot) * Get the replication level of the new vdev spec, reporting any * inconsistencies found. */ - if ((new = get_replication(newroot, TRUE)) == NULL) { + if ((new = get_replication(newroot, B_TRUE)) == NULL) { free(current); return (-1); } @@ -706,13 +763,24 @@ check_replication(nvlist_t *config, nvlist_t *newroot) */ ret = 0; if (current != NULL) { - if (strcmp(current->type, new->type) != 0 || - current->level != new->level) { + if (strcmp(current->zprl_type, new->zprl_type) != 0) { vdev_error(gettext( - "mismatched replication level: pool uses %d-way %s " - "and new vdev uses %d-way %s\n"), - current->level, current->type, new->level, - new->type); + "mismatched replication level: pool uses %s " + "and new vdev is %s\n"), + current->zprl_type, new->zprl_type); + ret = -1; + } else if (current->zprl_parity != new->zprl_parity) { + vdev_error(gettext( + "mismatched replication level: pool uses %llu " + "device parity and new vdev uses %llu\n"), + current->zprl_parity, new->zprl_parity); + ret = -1; + } else if (current->zprl_children != new->zprl_children) { + vdev_error(gettext( + "mismatched replication level: pool uses %llu-way " + "%s and new vdev uses %llu-way %s\n"), + current->zprl_children, current->zprl_type, + new->zprl_children, new->zprl_type); ret = -1; } } @@ -795,10 +863,12 @@ label_disk(char *name) (void) fprintf(stderr, gettext("use fdisk(1M) to partition " "the disk, and provide a specific slice\n")); (void) close(fd); + efi_free(vtoc); return (-1); } (void) close(fd); + efi_free(vtoc); return (0); } @@ -892,20 +962,75 @@ make_disks(nvlist_t *nv) if ((ret = make_disks(child[c])) != 0) return (ret); + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) + for (c = 0; c < children; c++) + if ((ret = make_disks(child[c])) != 0) + return (ret); + return (0); } /* + * Determine if the given path is a hot spare within the given configuration. + */ +static boolean_t +is_spare(nvlist_t *config, const char *path) +{ + int fd; + pool_state_t state; + char *name; + nvlist_t *label; + uint64_t guid, spareguid; + nvlist_t *nvroot; + nvlist_t **spares; + uint_t i, nspares; + boolean_t inuse; + + if ((fd = open(path, O_RDONLY)) < 0) + return (B_FALSE); + + if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || + !inuse || + state != POOL_STATE_SPARE || + zpool_read_label(fd, &label) != 0) { + (void) close(fd); + return (B_FALSE); + } + + (void) close(fd); + verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); + nvlist_free(label); + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + for (i = 0; i < nspares; i++) { + verify(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &spareguid) == 0); + if (spareguid == guid) + return (B_TRUE); + } + } + + return (B_FALSE); +} + +/* * Go through and find any devices that are in use. We rely on libdiskmgt for * the majority of this task. */ int -check_in_use(nvlist_t *nv, int force) +check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing, + int isspare) { nvlist_t **child; uint_t c, children; char *type, *path; int ret; + char buf[MAXPATHLEN]; + uint64_t wholedisk; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); @@ -914,22 +1039,76 @@ check_in_use(nvlist_t *nv, int force) verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + /* + * As a generic check, we look to see if this is a replace of a + * hot spare within the same pool. If so, we allow it + * regardless of what libdiskmgt or zpool_in_use() says. + */ + if (isreplacing) { + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk) == 0 && wholedisk) + (void) snprintf(buf, sizeof (buf), "%ss0", + path); + else + (void) strlcpy(buf, path, sizeof (buf)); + if (is_spare(config, buf)) + return (0); + } + if (strcmp(type, VDEV_TYPE_DISK) == 0) - ret = check_device(path, force); + ret = check_device(path, force, isspare); if (strcmp(type, VDEV_TYPE_FILE) == 0) - ret = check_file(path, force); + ret = check_file(path, force, isspare); return (ret); } for (c = 0; c < children; c++) - if ((ret = check_in_use(child[c], force)) != 0) + if ((ret = check_in_use(config, child[c], force, + isreplacing, B_FALSE)) != 0) return (ret); + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) + for (c = 0; c < children; c++) + if ((ret = check_in_use(config, child[c], force, + isreplacing, B_TRUE)) != 0) + return (ret); + return (0); } +const char * +is_grouping(const char *type, int *mindev) +{ + if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) { + if (mindev != NULL) + *mindev = 2; + return (VDEV_TYPE_RAIDZ); + } + + if (strcmp(type, "raidz2") == 0) { + if (mindev != NULL) + *mindev = 3; + return (VDEV_TYPE_RAIDZ); + } + + if (strcmp(type, "mirror") == 0) { + if (mindev != NULL) + *mindev = 2; + return (VDEV_TYPE_MIRROR); + } + + if (strcmp(type, "spare") == 0) { + if (mindev != NULL) + *mindev = 1; + return (VDEV_TYPE_SPARE); + } + + return (NULL); +} + /* * Construct a syntactically valid vdev specification, * and ensure that all devices and files exist and can be opened. @@ -939,11 +1118,14 @@ check_in_use(nvlist_t *nv, int force) nvlist_t * construct_spec(int argc, char **argv) { - nvlist_t *nvroot, *nv, **top; - int t, toplevels; + nvlist_t *nvroot, *nv, **top, **spares; + int t, toplevels, mindev, nspares; + const char *type; top = NULL; toplevels = 0; + spares = NULL; + nspares = 0; while (argc > 0) { nv = NULL; @@ -952,17 +1134,20 @@ construct_spec(int argc, char **argv) * If it's a mirror or raidz, the subsequent arguments are * its leaves -- until we encounter the next mirror or raidz. */ - if (strcmp(argv[0], VDEV_TYPE_MIRROR) == 0 || - strcmp(argv[0], VDEV_TYPE_RAIDZ) == 0) { - - char *type = argv[0]; + if ((type = is_grouping(argv[0], &mindev)) != NULL) { nvlist_t **child = NULL; - int children = 0; - int c; + int c, children = 0; + + if (strcmp(type, VDEV_TYPE_SPARE) == 0 && + spares != NULL) { + (void) fprintf(stderr, gettext("invalid vdev " + "specification: 'spare' can be " + "specified only once\n")); + return (NULL); + } for (c = 1; c < argc; c++) { - if (strcmp(argv[c], VDEV_TYPE_MIRROR) == 0 || - strcmp(argv[c], VDEV_TYPE_RAIDZ) == 0) + if (is_grouping(argv[c], NULL) != NULL) break; children++; child = realloc(child, @@ -974,29 +1159,38 @@ construct_spec(int argc, char **argv) child[children - 1] = nv; } - argc -= c; - argv += c; - - /* - * Mirrors and RAID-Z devices require at least - * two components. - */ - if (children < 2) { - (void) fprintf(stderr, - gettext("invalid vdev specification: " - "%s requires at least 2 devices\n"), type); + if (children < mindev) { + (void) fprintf(stderr, gettext("invalid vdev " + "specification: %s requires at least %d " + "devices\n"), argv[0], mindev); return (NULL); } - verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) == 0); - verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, - type) == 0); - verify(nvlist_add_nvlist_array(nv, - ZPOOL_CONFIG_CHILDREN, child, children) == 0); + argc -= c; + argv += c; + + if (strcmp(type, VDEV_TYPE_SPARE) == 0) { + spares = child; + nspares = children; + continue; + } else { + verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, + 0) == 0); + verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, + type) == 0); + if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { + verify(nvlist_add_uint64(nv, + ZPOOL_CONFIG_NPARITY, + mindev - 1) == 0); + } + verify(nvlist_add_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, child, + children) == 0); - for (c = 0; c < children; c++) - nvlist_free(child[c]); - free(child); + for (c = 0; c < children; c++) + nvlist_free(child[c]); + free(child); + } } else { /* * We have a device. Pass off to make_leaf_vdev() to @@ -1015,6 +1209,13 @@ construct_spec(int argc, char **argv) top[toplevels - 1] = nv; } + if (toplevels == 0 && nspares == 0) { + (void) fprintf(stderr, gettext("invalid vdev " + "specification: at least one toplevel vdev must be " + "specified\n")); + return (NULL); + } + /* * Finally, create nvroot and add all top-level vdevs to it. */ @@ -1023,9 +1224,16 @@ construct_spec(int argc, char **argv) VDEV_TYPE_ROOT) == 0); verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, top, toplevels) == 0); + if (nspares != 0) + verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + spares, nspares) == 0); for (t = 0; t < toplevels; t++) nvlist_free(top[t]); + for (t = 0; t < nspares; t++) + nvlist_free(spares[t]); + if (spares) + free(spares); free(top); return (nvroot); @@ -1043,7 +1251,7 @@ construct_spec(int argc, char **argv) */ nvlist_t * make_root_vdev(nvlist_t *poolconfig, int force, int check_rep, - int argc, char **argv) + boolean_t isreplacing, int argc, char **argv) { nvlist_t *newroot; @@ -1063,7 +1271,8 @@ make_root_vdev(nvlist_t *poolconfig, int force, int check_rep, * uses (such as a dedicated dump device) that even '-f' cannot * override. */ - if (check_in_use(newroot, force) != 0) { + if (check_in_use(poolconfig, newroot, force, isreplacing, + B_FALSE) != 0) { nvlist_free(newroot); return (NULL); } diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index c74f227bed..e8065c74f5 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -114,6 +114,7 @@ static uint64_t zopt_vdevtime; static int zopt_ashift = SPA_MINBLOCKSHIFT; static int zopt_mirrors = 2; static int zopt_raidz = 4; +static int zopt_raidz_parity = 1; static size_t zopt_vdev_size = SPA_MINDEVSIZE; static int zopt_datasets = 7; static int zopt_threads = 23; @@ -346,6 +347,7 @@ usage(void) "\t[-a alignment_shift (default: %d) (use 0 for random)]\n" "\t[-m mirror_copies (default: %d)]\n" "\t[-r raidz_disks (default: %d)]\n" + "\t[-R raidz_parity (default: %d)]\n" "\t[-d datasets (default: %d)]\n" "\t[-t threads (default: %d)]\n" "\t[-g gang_block_threshold (default: %s)]\n" @@ -364,6 +366,7 @@ usage(void) zopt_ashift, /* -a */ zopt_mirrors, /* -m */ zopt_raidz, /* -r */ + zopt_raidz_parity, /* -R */ zopt_datasets, /* -d */ zopt_threads, /* -t */ nice_gang_bang, /* -g */ @@ -407,7 +410,7 @@ process_options(int argc, char **argv) zio_gang_bang = 32 << 10; while ((opt = getopt(argc, argv, - "v:s:a:m:r:d:t:g:i:k:p:f:VET:P:")) != EOF) { + "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:")) != EOF) { value = 0; switch (opt) { case 'v': @@ -415,6 +418,7 @@ process_options(int argc, char **argv) case 'a': case 'm': case 'r': + case 'R': case 'd': case 't': case 'g': @@ -440,6 +444,9 @@ process_options(int argc, char **argv) case 'r': zopt_raidz = MAX(1, value); break; + case 'R': + zopt_raidz_parity = MIN(MAX(value, 1), 2); + break; case 'd': zopt_datasets = MAX(1, value); break; @@ -480,8 +487,10 @@ process_options(int argc, char **argv) } } + zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1); + zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX); - zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz >= 2 ? 2 : 1) - 1; + zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1; } static uint64_t @@ -542,6 +551,8 @@ make_vdev_raidz(size_t size, int r) VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, VDEV_TYPE_RAIDZ) == 0); + VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, + zopt_raidz_parity) == 0); VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, child, r) == 0); @@ -671,7 +682,7 @@ ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap) error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0, DMU_OT_NONE, 0, tx); - ASSERT(error == 0); + ASSERT3U(error, ==, 0); dmu_tx_commit(tx); if (zopt_verbose >= 5) { diff --git a/usr/src/lib/libdiskmgt/common/entry.c b/usr/src/lib/libdiskmgt/common/entry.c index 860801b41d..61bc9d60d4 100644 --- a/usr/src/lib/libdiskmgt/common/entry.c +++ b/usr/src/lib/libdiskmgt/common/entry.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -966,6 +965,10 @@ dm_get_usage_string(char *what, char *how, char **usage_string) *usage_string = dgettext(TEXT_DOMAIN, "%s is part of active ZFS pool %s. Please see zpool(1M)." "\n"); + } else if (strcmp(what, DM_USE_SPARE_ZPOOL) == 0) { + *usage_string = dgettext(TEXT_DOMAIN, + "%s is reserved as a hot spare for ZFS pool %s. Please " + "see zpool(1M).\n"); } } void diff --git a/usr/src/lib/libdiskmgt/common/inuse_zpool.c b/usr/src/lib/libdiskmgt/common/inuse_zpool.c index 1637ace92d..a7cf203a2f 100644 --- a/usr/src/lib/libdiskmgt/common/inuse_zpool.c +++ b/usr/src/lib/libdiskmgt/common/inuse_zpool.c @@ -46,17 +46,21 @@ #include <ctype.h> #include <sys/fs/zfs.h> +#include <libzfs.h> #include "libdiskmgt.h" #include "disks_private.h" /* * Pointers to libzfs.so functions that we dynamically resolve. */ -static int (*zfsdl_zpool_in_use)(int fd, pool_state_t *state, char **name); +static int (*zfsdl_zpool_in_use)(libzfs_handle_t *hdl, int fd, + pool_state_t *state, char **name, boolean_t *); +static libzfs_handle_t *(*zfsdl_libzfs_init)(boolean_t); static mutex_t init_lock = DEFAULTMUTEX; static rwlock_t zpool_lock = DEFAULTRWLOCK; -static int initialized = 0; +static boolean_t initialized; +static libzfs_handle_t *zfs_hdl; static void *init_zpool(); @@ -67,6 +71,7 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type) char *name; int fd; pool_state_t state; + boolean_t used; *errp = 0; if (slice == NULL) { @@ -83,15 +88,21 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type) (void) mutex_unlock(&init_lock); return (found); } - initialized = 1; + initialized = B_TRUE; } (void) mutex_unlock(&init_lock); (void) rw_rdlock(&zpool_lock); if ((fd = open(slice, O_RDONLY)) > 0) { - if (zfsdl_zpool_in_use(fd, &state, &name)) { + name = NULL; + if (zfsdl_zpool_in_use(zfs_hdl, fd, &state, + &name, &used) == 0 && used) { if (strcmp(type, DM_USE_ACTIVE_ZPOOL) == 0) { - if (state == POOL_STATE_ACTIVE) + if (state == POOL_STATE_ACTIVE) { found = 1; + } else if (state == POOL_STATE_SPARE) { + found = 1; + type = DM_USE_SPARE_ZPOOL; + } } else { found = 1; } @@ -100,9 +111,11 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type) libdiskmgt_add_str(attrs, DM_USED_BY, type, errp); libdiskmgt_add_str(attrs, DM_USED_NAME, - name, errp); + name, errp); } } + if (name) + free(name); (void) close(fd); } (void) rw_unlock(&zpool_lock); @@ -133,15 +146,24 @@ init_zpool() if ((lh = dlopen("libzfs.so", RTLD_NOW)) == NULL) { return (lh); } + /* * Instantiate the functions needed to get zpool configuration * data */ - if ((zfsdl_zpool_in_use = (int (*)(int, pool_state_t *, char **)) + if ((zfsdl_libzfs_init = (libzfs_handle_t *(*)(boolean_t)) + dlsym(lh, "libzfs_init")) == NULL || + (zfsdl_zpool_in_use = (int (*)(libzfs_handle_t *, int, + pool_state_t *, char **, boolean_t *)) dlsym(lh, "zpool_in_use")) == NULL) { (void) dlclose(lh); return (NULL); } + if ((zfs_hdl = (*zfsdl_libzfs_init)(B_FALSE)) == NULL) { + (void) dlclose(lh); + return (NULL); + } + return (lh); } diff --git a/usr/src/lib/libdiskmgt/common/libdiskmgt.h b/usr/src/lib/libdiskmgt/common/libdiskmgt.h index aa6df0967e..7d6fef46d4 100644 --- a/usr/src/lib/libdiskmgt/common/libdiskmgt.h +++ b/usr/src/lib/libdiskmgt/common/libdiskmgt.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -215,6 +214,7 @@ typedef enum { #define DM_USE_VFSTAB "vfstab" #define DM_USE_EXPORTED_ZPOOL "exported_zpool" #define DM_USE_ACTIVE_ZPOOL "active_zpool" +#define DM_USE_SPARE_ZPOOL "spare_zpool" /* event */ #define DM_EV_NAME "name" diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index 0044ccd7c9..bf4b2874ad 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -47,16 +47,78 @@ extern "C" { #define ZFS_MAXPROPLEN MAXPATHLEN /* + * libzfs errors + */ +enum { + EZFS_NOMEM = 2000, /* out of memory */ + EZFS_BADPROP, /* invalid property value */ + EZFS_PROPREADONLY, /* cannot set readonly property */ + EZFS_PROPTYPE, /* property does not apply to dataset type */ + EZFS_PROPNONINHERIT, /* property is not inheritable */ + EZFS_PROPSPACE, /* bad quota or reservation */ + EZFS_BADTYPE, /* dataset is not of appropriate type */ + EZFS_BUSY, /* pool or dataset is busy */ + EZFS_EXISTS, /* pool or dataset already exists */ + EZFS_NOENT, /* no such pool or dataset */ + EZFS_BADSTREAM, /* bad backup stream */ + EZFS_DSREADONLY, /* dataset is readonly */ + EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */ + EZFS_VOLHASDATA, /* volume already contains data */ + EZFS_INVALIDNAME, /* invalid dataset name */ + EZFS_BADRESTORE, /* unable to restore to destination */ + EZFS_BADBACKUP, /* backup failed */ + EZFS_BADTARGET, /* bad attach/detach/replace target */ + EZFS_NODEVICE, /* no such device in pool */ + EZFS_BADDEV, /* invalid device to add */ + EZFS_NOREPLICAS, /* no valid replicas */ + EZFS_RESILVERING, /* currently resilvering */ + EZFS_BADVERSION, /* unsupported version */ + EZFS_POOLUNAVAIL, /* pool is currently unavailable */ + EZFS_DEVOVERFLOW, /* too many devices in one vdev */ + EZFS_BADPATH, /* must be an absolute path */ + EZFS_CROSSTARGET, /* rename or clone across pool or dataset */ + EZFS_ZONED, /* used improperly in local zone */ + EZFS_MOUNTFAILED, /* failed to mount dataset */ + EZFS_UMOUNTFAILED, /* failed to unmount dataset */ + EZFS_UNSHAREFAILED, /* unshare(1M) failed */ + EZFS_SHAREFAILED, /* share(1M) failed */ + EZFS_DEVLINKS, /* failed to create zvol links */ + EZFS_PERM, /* permission denied */ + EZFS_NOSPC, /* out of space */ + EZFS_IO, /* I/O error */ + EZFS_INTR, /* signal received */ + EZFS_ISSPARE, /* device is a hot spare */ + EZFS_INVALCONFIG, /* invalid vdev configuration */ + EZFS_UNKNOWN /* unknown error */ +}; + +/* * Basic handle types */ typedef struct zfs_handle zfs_handle_t; typedef struct zpool_handle zpool_handle_t; +typedef struct libzfs_handle libzfs_handle_t; + +/* + * Library initialization + */ +extern libzfs_handle_t *libzfs_init(void); +extern void libzfs_fini(libzfs_handle_t *); + +extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *); +extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); + +extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); + +extern int libzfs_errno(libzfs_handle_t *); +extern const char *libzfs_error_action(libzfs_handle_t *); +extern const char *libzfs_error_description(libzfs_handle_t *); /* * Basic handle functions */ -extern zpool_handle_t *zpool_open(const char *); -extern zpool_handle_t *zpool_open_canfail(const char *); +extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *); +extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *); extern void zpool_close(zpool_handle_t *); extern const char *zpool_get_name(zpool_handle_t *); extern uint64_t zpool_get_guid(zpool_handle_t *); @@ -64,17 +126,19 @@ extern uint64_t zpool_get_space_used(zpool_handle_t *); extern uint64_t zpool_get_space_total(zpool_handle_t *); extern int zpool_get_root(zpool_handle_t *, char *, size_t); extern int zpool_get_state(zpool_handle_t *); +extern uint64_t zpool_get_version(zpool_handle_t *); /* * Iterate over all active pools in the system. */ typedef int (*zpool_iter_f)(zpool_handle_t *, void *); -extern int zpool_iter(zpool_iter_f, void *); +extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); /* * Functions to create and destroy pools */ -extern int zpool_create(const char *, nvlist_t *, const char *); +extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, + const char *); extern int zpool_destroy(zpool_handle_t *); extern int zpool_add(zpool_handle_t *, nvlist_t *); @@ -88,8 +152,9 @@ extern int zpool_vdev_offline(zpool_handle_t *, const char *, int); extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *, nvlist_t *, int); extern int zpool_vdev_detach(zpool_handle_t *, const char *); +extern int zpool_vdev_remove(zpool_handle_t *, const char *); extern int zpool_clear(zpool_handle_t *, const char *); -extern uint64_t zpool_vdev_to_guid(zpool_handle_t *, const char *); +extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *); /* * Pool health statistics. @@ -143,24 +208,25 @@ extern int zpool_get_errlog(zpool_handle_t *, nvlist_t ***, size_t *); * Import and export functions */ extern int zpool_export(zpool_handle_t *); -extern int zpool_import(nvlist_t *, const char *, const char *); +extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, + const char *); /* * Search for pools to import */ -extern nvlist_t *zpool_find_import(int, char **); +extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **); /* * Miscellaneous pool functions */ -extern char *zpool_vdev_name(zpool_handle_t *, nvlist_t *); +extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *); extern int zpool_upgrade(zpool_handle_t *); /* * Basic handle manipulations. These functions do not create or destroy the * underlying datasets, only the references to them. */ -extern zfs_handle_t *zfs_open(const char *, int); +extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int); extern void zfs_close(zfs_handle_t *); extern zfs_type_t zfs_get_type(const zfs_handle_t *); extern const char *zfs_get_name(const zfs_handle_t *); @@ -182,11 +248,11 @@ typedef enum { const char *zfs_prop_to_name(zfs_prop_t); int zfs_prop_set(zfs_handle_t *, zfs_prop_t, const char *); int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zfs_source_t *, - char *, size_t, int); + char *, size_t, boolean_t); int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, zfs_source_t *, char *, size_t); uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); -int zfs_prop_validate(zfs_prop_t, const char *, uint64_t *); +int zfs_prop_validate(libzfs_handle_t *, zfs_prop_t, const char *, uint64_t *); int zfs_prop_inheritable(zfs_prop_t); int zfs_prop_inherit(zfs_handle_t *, zfs_prop_t); const char *zfs_prop_values(zfs_prop_t); @@ -206,7 +272,7 @@ int zfs_get_proplist(char *fields, zfs_prop_t *proplist, int max, int *count, * Iterator functions. */ typedef int (*zfs_iter_f)(zfs_handle_t *, void *); -extern int zfs_iter_root(zfs_iter_f, void *); +extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_dependents(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); @@ -215,14 +281,16 @@ extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *); /* * Functions to create and destroy datasets. */ -extern int zfs_create(const char *, zfs_type_t, const char *, const char *); +extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, + const char *, const char *); extern int zfs_destroy(zfs_handle_t *); extern int zfs_clone(zfs_handle_t *, const char *); -extern int zfs_snapshot(const char *); +extern int zfs_snapshot(libzfs_handle_t *, const char *); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int); extern int zfs_rename(zfs_handle_t *, const char *); extern int zfs_send(zfs_handle_t *, zfs_handle_t *); -extern int zfs_receive(const char *, int, int, int); +extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int); +extern int zfs_promote(zfs_handle_t *); /* * Miscellaneous functions. @@ -234,7 +302,7 @@ extern int zfs_name_valid(const char *, zfs_type_t); /* * Mount support functions. */ -extern int zfs_is_mounted(zfs_handle_t *, char **); +extern boolean_t zfs_is_mounted(zfs_handle_t *, char **); extern int zfs_mount(zfs_handle_t *, const char *, int); extern int zfs_unmount(zfs_handle_t *, const char *, int); extern int zfs_unmountall(zfs_handle_t *, int); @@ -242,17 +310,12 @@ extern int zfs_unmountall(zfs_handle_t *, int); /* * Share support functions. */ -extern int zfs_is_shared(zfs_handle_t *, char **); +extern boolean_t zfs_is_shared(zfs_handle_t *, char **); extern int zfs_share(zfs_handle_t *); extern int zfs_unshare(zfs_handle_t *, const char *); extern int zfs_unshareall(zfs_handle_t *); /* - * For clients that need to capture error output. - */ -extern void zfs_set_error_handler(void (*)(const char *, va_list)); - -/* * When dealing with nvlists, verify() is extremely useful */ #ifdef NDEBUG @@ -276,12 +339,13 @@ extern int zfs_remove_link(zfs_handle_t *); /* * Given a device or file, determine if it is part of a pool. */ -extern int zpool_in_use(int fd, pool_state_t *state, char **name); +extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, + boolean_t *); /* * ftyp special. Read the label from a given device. */ -extern nvlist_t *zpool_read_label(int fd); +extern int zpool_read_label(int, nvlist_t **); /* * Create and remove zvol /dev links @@ -289,21 +353,6 @@ extern nvlist_t *zpool_read_label(int fd); extern int zpool_create_zvol_links(zpool_handle_t *); extern int zpool_remove_zvol_links(zpool_handle_t *); -/* - * zoneadmd hack - */ -extern void zfs_init(void); - -/* - * Useful defines - */ -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef FALSE -#define FALSE 0 -#endif - #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libzfs/common/libzfs_changelist.c b/usr/src/lib/libzfs/common/libzfs_changelist.c index 57fcc1497c..04270dfe51 100644 --- a/usr/src/lib/libzfs/common/libzfs_changelist.c +++ b/usr/src/lib/libzfs/common/libzfs_changelist.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -73,11 +72,11 @@ struct prop_changelist { zfs_prop_t cl_realprop; uu_list_pool_t *cl_pool; uu_list_t *cl_list; - int cl_waslegacy; - int cl_allchildren; - int cl_alldependents; + boolean_t cl_waslegacy; + boolean_t cl_allchildren; + boolean_t cl_alldependents; int cl_flags; - int cl_haszonedchild; + boolean_t cl_haszonedchild; }; /* @@ -109,7 +108,8 @@ changelist_prefix(prop_changelist_t *clp) */ if (cn->cn_handle->zfs_volblocksize && clp->cl_realprop == ZFS_PROP_NAME) { - if (zvol_remove_link(cn->cn_handle->zfs_name) != 0) + if (zvol_remove_link(cn->cn_handle->zfs_hdl, + cn->cn_handle->zfs_name) != 0) ret = -1; } else if (zfs_unmount(cn->cn_handle, NULL, clp->cl_flags) != 0) ret = -1; @@ -167,7 +167,8 @@ changelist_postfix(prop_changelist_t *clp) */ if (cn->cn_handle->zfs_volblocksize && clp->cl_realprop == ZFS_PROP_NAME) { - if (zvol_create_link(cn->cn_handle->zfs_name) != 0) + if (zvol_create_link(cn->cn_handle->zfs_hdl, + cn->cn_handle->zfs_name) != 0) ret = -1; continue; } @@ -186,7 +187,7 @@ changelist_postfix(prop_changelist_t *clp) char shareopts[ZFS_MAXPROPLEN]; if (zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS, shareopts, sizeof (shareopts), NULL, NULL, 0, - FALSE) == 0 && strcmp(shareopts, "off") == 0) + B_FALSE) == 0 && strcmp(shareopts, "off") == 0) ret = zfs_unshare(cn->cn_handle, NULL); else ret = zfs_share(cn->cn_handle); @@ -199,22 +200,22 @@ changelist_postfix(prop_changelist_t *clp) /* * Is this "dataset" a child of "parent"? */ -static int +static boolean_t isa_child_of(char *dataset, const char *parent) { int len; /* snapshot does not have a child */ if (strchr(parent, '@')) - return (FALSE); + return (B_FALSE); len = strlen(parent); if (strncmp(dataset, parent, len) == 0 && (dataset[len] == '/' || dataset[len] == '\0')) - return (TRUE); + return (B_TRUE); else - return (FALSE); + return (B_FALSE); } @@ -326,6 +327,9 @@ changelist_free(prop_changelist_t *clp) free(cn); } + uu_list_walk_end(walk); + + uu_list_destroy(clp->cl_list); uu_list_pool_destroy(clp->cl_pool); free(clp); @@ -353,12 +357,18 @@ change_one(zfs_handle_t *zhp, void *data) if (!(zhp->zfs_volblocksize && clp->cl_realprop == ZFS_PROP_NAME) && zfs_prop_get(zhp, clp->cl_prop, property, sizeof (property), &sourcetype, where, sizeof (where), - FALSE) != 0) + B_FALSE) != 0) { + zfs_close(zhp); return (0); + } if (clp->cl_alldependents || clp->cl_allchildren || sourcetype == ZFS_SRC_DEFAULT || sourcetype == ZFS_SRC_INHERITED) { - cn = zfs_malloc(sizeof (prop_changenode_t)); + if ((cn = zfs_alloc(zfs_get_handle(zhp), + sizeof (prop_changenode_t))) == NULL) { + zfs_close(zhp); + return (-1); + } cn->cn_handle = zhp; cn->cn_mounted = zfs_is_mounted(zhp, NULL); @@ -367,7 +377,7 @@ change_one(zfs_handle_t *zhp, void *data) /* indicate if any child is exported to a local zone */ if ((getzoneid() == GLOBAL_ZONEID) && cn->cn_zoned) - clp->cl_haszonedchild = TRUE; + clp->cl_haszonedchild = B_TRUE; uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool); @@ -399,11 +409,14 @@ change_one(zfs_handle_t *zhp, void *data) prop_changelist_t * changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags) { - prop_changelist_t *clp = zfs_malloc(sizeof (prop_changelist_t)); + prop_changelist_t *clp; prop_changenode_t *cn; zfs_handle_t *temp; char property[ZFS_MAXPROPLEN]; + if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL) + return (NULL); + clp->cl_pool = uu_list_pool_create("changelist_pool", sizeof (prop_changenode_t), offsetof(prop_changenode_t, cn_listnode), @@ -423,10 +436,10 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags) */ if (prop == ZFS_PROP_NAME) { clp->cl_prop = ZFS_PROP_MOUNTPOINT; - clp->cl_alldependents = TRUE; + clp->cl_alldependents = B_TRUE; } else if (prop == ZFS_PROP_ZONED) { clp->cl_prop = ZFS_PROP_MOUNTPOINT; - clp->cl_allchildren = TRUE; + clp->cl_allchildren = B_TRUE; } else { clp->cl_prop = prop; } @@ -450,8 +463,9 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags) * We have to re-open ourselves because we auto-close all the handles * and can't tell the difference. */ - if ((temp = zfs_open(zfs_get_name(zhp), ZFS_TYPE_ANY)) == NULL) { - free(clp); + if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp), + ZFS_TYPE_ANY)) == NULL) { + changelist_free(clp); return (NULL); } @@ -459,7 +473,13 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags) * Always add ourself to the list. We add ourselves to the end so that * we're the last to be unmounted. */ - cn = zfs_malloc(sizeof (prop_changenode_t)); + if ((cn = zfs_alloc(zhp->zfs_hdl, + sizeof (prop_changenode_t))) == NULL) { + zfs_close(temp); + changelist_free(clp); + return (NULL); + } + cn->cn_handle = temp; cn->cn_mounted = zfs_is_mounted(temp, NULL); cn->cn_shared = zfs_is_shared(temp, NULL); @@ -474,10 +494,10 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags) * as the behavior of changelist_postfix() will be different. */ if (zfs_prop_get(zhp, prop, property, sizeof (property), - NULL, NULL, 0, FALSE) == 0 && + NULL, NULL, 0, B_FALSE) == 0 && (strcmp(property, "legacy") == 0 || strcmp(property, "none") == 0 || strcmp(property, "off") == 0)) - clp->cl_waslegacy = TRUE; + clp->cl_waslegacy = B_TRUE; return (clp); } diff --git a/usr/src/lib/libzfs/common/libzfs_config.c b/usr/src/lib/libzfs/common/libzfs_config.c index 71801d5cba..be691f0ced 100644 --- a/usr/src/lib/libzfs/common/libzfs_config.c +++ b/usr/src/lib/libzfs/common/libzfs_config.c @@ -45,9 +45,6 @@ #include "libzfs_impl.h" -static uu_avl_t *namespace_avl; -static uint64_t namespace_generation; - typedef struct config_node { char *cn_name; nvlist_t *cn_config; @@ -73,11 +70,41 @@ config_node_compare(const void *a, const void *b, void *unused) return (0); } +void +namespace_clear(libzfs_handle_t *hdl) +{ + if (hdl->libzfs_ns_avl) { + uu_avl_walk_t *walk; + config_node_t *cn; + + if ((walk = uu_avl_walk_start(hdl->libzfs_ns_avl, + UU_WALK_ROBUST)) == NULL) + return; + + while ((cn = uu_avl_walk_next(walk)) != NULL) { + uu_avl_remove(hdl->libzfs_ns_avl, cn); + nvlist_free(cn->cn_config); + free(cn->cn_name); + free(cn); + } + + uu_avl_walk_end(walk); + + uu_avl_destroy(hdl->libzfs_ns_avl); + hdl->libzfs_ns_avl = NULL; + } + + if (hdl->libzfs_ns_avlpool) { + uu_avl_pool_destroy(hdl->libzfs_ns_avlpool); + hdl->libzfs_ns_avlpool = NULL; + } +} + /* * Loads the pool namespace, or re-loads it if the cache has changed. */ -static void -namespace_reload() +static int +namespace_reload(libzfs_handle_t *hdl) { nvlist_t *config; config_node_t *cn; @@ -85,23 +112,21 @@ namespace_reload() zfs_cmd_t zc = { 0 }; uu_avl_walk_t *walk; - if (namespace_generation == 0) { + if (hdl->libzfs_ns_gen == 0) { /* * This is the first time we've accessed the configuration * cache. Initialize the AVL tree and then fall through to the * common code. */ - uu_avl_pool_t *pool; - - if ((pool = uu_avl_pool_create("config_pool", + if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool", sizeof (config_node_t), offsetof(config_node_t, cn_avl), config_node_compare, UU_DEFAULT)) == NULL) - no_memory(); + return (no_memory(hdl)); - if ((namespace_avl = uu_avl_create(pool, NULL, - UU_DEFAULT)) == NULL) - no_memory(); + if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool, + NULL, UU_DEFAULT)) == NULL) + return (no_memory(hdl)); } /* @@ -114,68 +139,92 @@ namespace_reload() * been modified to tell us how much to allocate. */ zc.zc_config_dst_size = 1024; - zc.zc_config_dst = (uint64_t)(uintptr_t) - zfs_malloc(zc.zc_config_dst_size); + if ((zc.zc_config_dst = (uint64_t)(uintptr_t) + zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL) + return (-1); for (;;) { - zc.zc_cookie = namespace_generation; - if (zfs_ioctl(ZFS_IOC_POOL_CONFIGS, &zc) != 0) { + zc.zc_cookie = hdl->libzfs_ns_gen; + if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) { switch (errno) { case EEXIST: /* * The namespace hasn't changed. */ free((void *)(uintptr_t)zc.zc_config_dst); - return; + return (0); case ENOMEM: free((void *)(uintptr_t)zc.zc_config_dst); - zc.zc_config_dst = (uint64_t)(uintptr_t) - zfs_malloc(zc.zc_config_dst_size); + if ((zc.zc_config_dst = (uint64_t)(uintptr_t) + zfs_alloc(hdl, zc.zc_config_dst_size)) + == NULL) + return (-1); break; default: - zfs_baderror(errno); + return (zfs_standard_error(hdl, errno, + dgettext(TEXT_DOMAIN, "failed to read " + "pool configuration"))); } } else { - namespace_generation = zc.zc_cookie; + hdl->libzfs_ns_gen = zc.zc_cookie; break; } } - verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst, - zc.zc_config_dst_size, &config, 0) == 0); + if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst, + zc.zc_config_dst_size, &config, 0) != 0) + return (no_memory(hdl)); free((void *)(uintptr_t)zc.zc_config_dst); /* * Clear out any existing configuration information. */ - if ((walk = uu_avl_walk_start(namespace_avl, UU_WALK_ROBUST)) == NULL) - no_memory(); + if ((walk = uu_avl_walk_start(hdl->libzfs_ns_avl, + UU_WALK_ROBUST)) == NULL) { + nvlist_free(config); + return (no_memory(hdl)); + } while ((cn = uu_avl_walk_next(walk)) != NULL) { - uu_avl_remove(namespace_avl, cn); + uu_avl_remove(hdl->libzfs_ns_avl, cn); nvlist_free(cn->cn_config); free(cn->cn_name); free(cn); } + uu_avl_walk_end(walk); + elem = NULL; while ((elem = nvlist_next_nvpair(config, elem)) != NULL) { nvlist_t *child; uu_avl_index_t where; - cn = zfs_malloc(sizeof (config_node_t)); - cn->cn_name = zfs_strdup(nvpair_name(elem)); + if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) { + nvlist_free(config); + return (-1); + } + + if ((cn->cn_name = zfs_strdup(hdl, + nvpair_name(elem))) == NULL) { + free(cn); + return (-1); + } verify(nvpair_value_nvlist(elem, &child) == 0); - verify(nvlist_dup(child, &cn->cn_config, 0) == 0); - verify(uu_avl_find(namespace_avl, cn, NULL, &where) == NULL); + if (nvlist_dup(child, &cn->cn_config, 0) != 0) { + nvlist_free(config); + return (no_memory(hdl)); + } + verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where) + == NULL); - uu_avl_insert(namespace_avl, cn, where); + uu_avl_insert(hdl->libzfs_ns_avl, cn, where); } nvlist_free(config); + return (0); } /* @@ -209,35 +258,43 @@ zpool_refresh_stats(zpool_handle_t *zhp) zhp->zpool_config_size = 1 << 16; zc.zc_config_dst_size = zhp->zpool_config_size; - zc.zc_config_dst = (uint64_t)(uintptr_t) - zfs_malloc(zc.zc_config_dst_size); + if ((zc.zc_config_dst = (uint64_t)(uintptr_t) + zfs_alloc(zhp->zpool_hdl, zc.zc_config_dst_size)) == NULL) + return (-1); for (;;) { - if (zfs_ioctl(ZFS_IOC_POOL_STATS, &zc) == 0) { + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS, + &zc) == 0) { /* * The real error is returned in the zc_cookie field. */ - error = zc.zc_cookie; + error = errno = zc.zc_cookie; break; } if (errno == ENOMEM) { free((void *)(uintptr_t)zc.zc_config_dst); - zc.zc_config_dst = (uint64_t)(uintptr_t) - zfs_malloc(zc.zc_config_dst_size); + if ((zc.zc_config_dst = (uint64_t)(uintptr_t) + zfs_alloc(zhp->zpool_hdl, + zc.zc_config_dst_size)) == NULL) + return (-1); } else { free((void *)(uintptr_t)zc.zc_config_dst); - return (errno); + return (-1); } } - verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst, - zc.zc_config_dst_size, &config, 0) == 0); + if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst, + zc.zc_config_dst_size, &config, 0) != 0) { + free((void *)(uintptr_t)zc.zc_config_dst); + return (no_memory(zhp->zpool_hdl)); + } zhp->zpool_config_size = zc.zc_config_dst_size; free((void *)(uintptr_t)zc.zc_config_dst); - set_pool_health(config); + if (set_pool_health(config) != 0) + return (no_memory(zhp->zpool_hdl)); if (zhp->zpool_config != NULL) { uint64_t oldtxg, newtxg; @@ -260,25 +317,26 @@ zpool_refresh_stats(zpool_handle_t *zhp) zhp->zpool_config = config; - return (error); + return (error ? -1 : 0); } /* * Iterate over all pools in the system. */ int -zpool_iter(zpool_iter_f func, void *data) +zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data) { config_node_t *cn; zpool_handle_t *zhp; int ret; - namespace_reload(); + if (namespace_reload(hdl) != 0) + return (-1); - for (cn = uu_avl_first(namespace_avl); cn != NULL; - cn = uu_avl_next(namespace_avl, cn)) { + for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL; + cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) { - if ((zhp = zpool_open_silent(cn->cn_name)) == NULL) + if ((zhp = zpool_open_silent(hdl, cn->cn_name)) == NULL) continue; if ((ret = func(zhp, data)) != 0) @@ -293,18 +351,19 @@ zpool_iter(zpool_iter_f func, void *data) * handle passed each time must be explicitly closed by the callback. */ int -zfs_iter_root(zfs_iter_f func, void *data) +zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data) { config_node_t *cn; zfs_handle_t *zhp; int ret; - namespace_reload(); + if (namespace_reload(hdl) != 0) + return (-1); - for (cn = uu_avl_first(namespace_avl); cn != NULL; - cn = uu_avl_next(namespace_avl, cn)) { + for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL; + cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) { - if ((zhp = make_dataset_handle(cn->cn_name)) == NULL) + if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL) continue; if ((ret = func(zhp, data)) != 0) diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index f23136c8aa..14ba6112ed 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -36,6 +36,7 @@ #include <strings.h> #include <unistd.h> #include <zone.h> +#include <fcntl.h> #include <sys/mntent.h> #include <sys/mnttab.h> #include <sys/mount.h> @@ -64,7 +65,6 @@ zfs_type_to_name(zfs_type_t type) return (dgettext(TEXT_DOMAIN, "volume")); } - zfs_baderror(type); return (NULL); } @@ -118,43 +118,43 @@ path_to_str(const char *path, int types) * 'buf' detailing exactly why the name was not valid. */ static int -zfs_validate_name(const char *path, int type, char *buf, size_t buflen) +zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type) { namecheck_err_t why; char what; if (dataset_namecheck(path, &why, &what) != 0) { - if (buf != NULL) { + if (hdl != NULL) { switch (why) { case NAME_ERR_TOOLONG: - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "name is too long"), buflen); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "name is too long")); break; case NAME_ERR_LEADING_SLASH: - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "leading slash"), buflen); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "leading slash in name")); break; case NAME_ERR_EMPTY_COMPONENT: - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "empty component"), buflen); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "empty component in name")); break; case NAME_ERR_TRAILING_SLASH: - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "trailing slash"), buflen); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "trailing slash in name")); break; case NAME_ERR_INVALCHAR: - (void) snprintf(buf, buflen, + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid character " - "'%c'"), what); + "'%c' in name"), what); break; case NAME_ERR_MULTIPLE_AT: - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "multiple '@' delimiters"), buflen); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "multiple '@' delimiters in name")); break; } } @@ -163,20 +163,19 @@ zfs_validate_name(const char *path, int type, char *buf, size_t buflen) } if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) { - if (buf != NULL) - (void) strlcpy(buf, - dgettext(TEXT_DOMAIN, - "snapshot delimiter '@'"), buflen); + if (hdl != NULL) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshot delimiter '@' in filesystem name")); return (0); } - return (1); + return (-1); } int zfs_name_valid(const char *name, zfs_type_t type) { - return (zfs_validate_name(name, type, NULL, NULL)); + return (zfs_validate_name(NULL, name, type)); } /* @@ -189,13 +188,16 @@ get_stats(zfs_handle_t *zhp) (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - zc.zc_config_src = (uint64_t)(uintptr_t)zfs_malloc(1024); + if ((zc.zc_config_src = (uint64_t)(uintptr_t)malloc(1024)) == NULL) + return (-1); zc.zc_config_src_size = 1024; - while (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0) { + while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { if (errno == ENOMEM) { - zc.zc_config_src = (uint64_t)(uintptr_t) - zfs_malloc(zc.zc_config_src_size); + free((void *)(uintptr_t)zc.zc_config_src); + if ((zc.zc_config_src = (uint64_t)(uintptr_t) + malloc(zc.zc_config_src_size)) == NULL) + return (-1); } else { free((void *)(uintptr_t)zc.zc_config_src); return (-1); @@ -207,12 +209,22 @@ get_stats(zfs_handle_t *zhp) (void) strcpy(zhp->zfs_root, zc.zc_root); - verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_src, - zc.zc_config_src_size, &zhp->zfs_props, 0) == 0); + if (zhp->zfs_props) { + nvlist_free(zhp->zfs_props); + zhp->zfs_props = NULL; + } + + if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_src, + zc.zc_config_src_size, &zhp->zfs_props, 0) != 0) { + free((void *)(uintptr_t)zc.zc_config_src); + return (-1); + } zhp->zfs_volsize = zc.zc_volsize; zhp->zfs_volblocksize = zc.zc_volblocksize; + free((void *)(uintptr_t)zc.zc_config_src); + return (0); } @@ -230,9 +242,14 @@ zfs_refresh_properties(zfs_handle_t *zhp) * zfs_iter_* to create child handles on the fly. */ zfs_handle_t * -make_dataset_handle(const char *path) +make_dataset_handle(libzfs_handle_t *hdl, const char *path) { - zfs_handle_t *zhp = zfs_malloc(sizeof (zfs_handle_t)); + zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); + + if (zhp == NULL) + return (NULL); + + zhp->zfs_hdl = hdl; top: (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); @@ -263,20 +280,20 @@ top: (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - (void) zvol_remove_link(zhp->zfs_name); + (void) zvol_remove_link(hdl, zhp->zfs_name); zc.zc_objset_type = DMU_OST_ZVOL; } else { zc.zc_objset_type = DMU_OST_ZFS; } /* If we can successfully roll it back, reget the stats */ - if (zfs_ioctl(ZFS_IOC_ROLLBACK, &zc) == 0) + if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0) goto top; /* * If we can sucessfully destroy it, pretend that it * never existed. */ - if (zfs_ioctl(ZFS_IOC_DESTROY, &zc) == 0) { + if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) { free(zhp); errno = ENOENT; return (NULL); @@ -294,8 +311,7 @@ top: else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) zhp->zfs_type = ZFS_TYPE_FILESYSTEM; else - /* we should never see any other dataset types */ - zfs_baderror(zhp->zfs_dmustats.dds_type); + abort(); /* we should never see any other types */ return (zhp); } @@ -306,18 +322,21 @@ top: * appropriate error message and return NULL if it can't be opened. */ zfs_handle_t * -zfs_open(const char *path, int types) +zfs_open(libzfs_handle_t *hdl, const char *path, int types) { zfs_handle_t *zhp; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot open '%s'"), path); /* - * Validate the name before we even try to open it. We don't care about - * the verbose invalid messages here; just report a generic error. + * Validate the name before we even try to open it. */ - if (!zfs_validate_name(path, types, NULL, 0)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot open '%s': invalid %s name"), path, - path_to_str(path, types)); + if (!zfs_validate_name(hdl, path, ZFS_TYPE_ANY)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid dataset name")); + (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); return (NULL); } @@ -325,48 +344,13 @@ zfs_open(const char *path, int types) * Try to get stats for the dataset, which will tell us if it exists. */ errno = 0; - if ((zhp = make_dataset_handle(path)) == NULL) { - switch (errno) { - case ENOENT: - /* - * The dataset doesn't exist. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot open '%s': no such %s"), path, - path_to_str(path, types)); - break; - - case EBUSY: - /* - * We were able to open the dataset but couldn't - * get the stats. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot open '%s': %s is busy"), path, - path_to_str(path, types)); - break; - - case ENXIO: - case EIO: - /* - * I/O error from the underlying pool. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot open '%s': I/O error"), path, - path_to_str(path, types)); - break; - - default: - zfs_baderror(errno); - - } + if ((zhp = make_dataset_handle(hdl, path)) == NULL) { + (void) zfs_standard_error(hdl, errno, errbuf, path); return (NULL); } if (!(types & zhp->zfs_type)) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': operation " - "not supported for %ss"), path, - zfs_type_to_name(zhp->zfs_type)); + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); free(zhp); return (NULL); } @@ -382,6 +366,8 @@ zfs_close(zfs_handle_t *zhp) { if (zhp->zfs_mntopts) free(zhp->zfs_mntopts); + if (zhp->zfs_props) + nvlist_free(zhp->zfs_props); free(zhp); } @@ -443,7 +429,7 @@ struct { * resulting value must be shifted. */ static int -str2shift(const char *buf, char *reason, size_t len) +str2shift(libzfs_handle_t *hdl, const char *buf) { const char *ends = "BKMGTPEZ"; int i; @@ -455,8 +441,8 @@ str2shift(const char *buf, char *reason, size_t len) break; } if (i == strlen(ends)) { - (void) snprintf(reason, len, dgettext(TEXT_DOMAIN, "invalid " - "numeric suffix '%s'"), buf); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid numeric suffix '%s'"), buf); return (-1); } @@ -465,12 +451,11 @@ str2shift(const char *buf, char *reason, size_t len) * allow 'BB' - that's just weird. */ if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' && - toupper(buf[0]) != 'B')) { + toupper(buf[0]) != 'B')) return (10*i); - } - (void) snprintf(reason, len, dgettext(TEXT_DOMAIN, "invalid numeric " - "suffix '%s'"), buf); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid numeric suffix '%s'"), buf); return (-1); } @@ -480,7 +465,7 @@ str2shift(const char *buf, char *reason, size_t len) * message for the caller to use. */ static int -nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen) +nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num) { char *end; int shift; @@ -489,8 +474,9 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen) /* Check to see if this looks like a number. */ if ((value[0] < '0' || value[0] > '9') && value[0] != '.') { - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "must be a numeric value"), buflen); + if (hdl) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "bad numeric value '%s'"), value); return (-1); } @@ -503,8 +489,9 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen) * in a 64-bit value. */ if (errno == ERANGE) { - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "value is too large"), buflen); + if (hdl) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "numeric value is too large")); return (-1); } @@ -515,26 +502,28 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen) if (*end == '.') { double fval = strtod(value, &end); - if ((shift = str2shift(end, buf, buflen)) == -1) + if ((shift = str2shift(hdl, end)) == -1) return (-1); fval *= pow(2, shift); if (fval > UINT64_MAX) { - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "value is too large"), buflen); + if (hdl) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "numeric value is too large")); return (-1); } *num = (uint64_t)fval; } else { - if ((shift = str2shift(end, buf, buflen)) == -1) + if ((shift = str2shift(hdl, end)) == -1) return (-1); /* Check for overflow */ if (shift >= 64 || (*num << shift) >> shift != *num) { - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "value is too large"), buflen); + if (hdl) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "numeric value is too large")); return (-1); } @@ -547,9 +536,7 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen) int zfs_nicestrtonum(const char *str, uint64_t *val) { - char buf[1]; - - return (nicestrtonum(str, val, buf, sizeof (buf))); + return (nicestrtonum(NULL, str, val)); } /* @@ -557,28 +544,28 @@ zfs_nicestrtonum(const char *str, uint64_t *val) * by zfs_prop_set() and some libzfs consumers. */ int -zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) +zfs_prop_validate(libzfs_handle_t *hdl, zfs_prop_t prop, const char *value, + uint64_t *intval) { const char *propname = zfs_prop_to_name(prop); uint64_t number; - char reason[64]; + char errbuf[1024]; int i; /* * Check to see if this a read-only property. */ - if (zfs_prop_readonly(prop)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s property: read-only property"), propname); - return (-1); - } + if (zfs_prop_readonly(prop)) + return (zfs_error(hdl, EZFS_PROPREADONLY, + dgettext(TEXT_DOMAIN, "cannot set %s property"), propname)); + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "bad %s value '%s'"), propname, value); /* See if the property value is too long */ if (strlen(value) >= ZFS_MAXPROPLEN) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': value is too long"), propname, - value); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "value is too long")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } /* Perform basic checking based on property type */ @@ -589,10 +576,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) } else if (strcmp(value, "off") == 0) { number = 0; } else { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': must be 'on' or 'off'"), - propname, value); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must be 'on' or 'off'")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } break; @@ -603,21 +589,15 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) break; } - if (nicestrtonum(value, &number, reason, - sizeof (reason)) != 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': %s"), propname, value, - reason); - return (-1); - } + if (nicestrtonum(hdl, value, &number) != 0) + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); /* don't allow 0 for quota, use 'none' instead */ if (prop == ZFS_PROP_QUOTA && number == 0 && strcmp(value, "none") != 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': use '%s=none' to disable"), - propname, value, propname); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "use 'quota=none' to disable")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } /* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */ @@ -625,13 +605,11 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) prop == ZFS_PROP_VOLBLOCKSIZE) { if (number < SPA_MINBLOCKSIZE || number > SPA_MAXBLOCKSIZE || !ISP2(number)) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': " + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "must be power of 2 from %u to %uk"), - propname, value, (uint_t)SPA_MINBLOCKSIZE, (uint_t)SPA_MAXBLOCKSIZE >> 10); - return (-1); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } } @@ -652,11 +630,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) break; if (value[0] != '/') { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': must be an absolute " - "path, 'none', or 'legacy'"), - propname, value); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must be an absolute path, 'none', or " + "'legacy'")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } break; @@ -670,11 +647,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) } if (checksum_table[i].name == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': must be 'on', 'off', " - "'fletcher2', 'fletcher4', or 'sha256'"), - propname, value); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must be 'on', 'off', 'fletcher2', " + "'fletcher4', or 'sha256'")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } break; @@ -688,11 +664,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) } if (compress_table[i].name == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': must be 'on', 'off', " - "or 'lzjb'"), - propname, value); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must be 'on', 'off', or 'lzjb'")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } break; @@ -705,11 +679,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) } if (snapdir_table[i].name == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': must be 'hidden' " - "or 'visible'"), - propname, value); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must be 'hidden' or 'visible'")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } break; @@ -723,11 +695,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) } if (acl_mode_table[i].name == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': must be 'discard', " - "'groupmask' or 'passthrough'"), - propname, value); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must be 'disacard', 'groupmask', or " + "'passthrough'")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } break; @@ -741,11 +712,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval) } if (acl_inherit_table[i].name == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad %s value '%s': must be 'discard', " - "'noallow', 'secure' or 'passthrough'"), - propname, value); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must be 'discard, 'noallow', 'secure', " + "or 'passthrough'")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); } break; @@ -775,19 +745,22 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval) zfs_cmd_t zc = { 0 }; int ret; prop_changelist_t *cl; + char errbuf[1024]; + libzfs_handle_t *hdl = zhp->zfs_hdl; - if (zfs_prop_validate(prop, propval, &number) != 0) + if (zfs_prop_validate(zhp->zfs_hdl, prop, propval, &number) != 0) return (-1); + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot set %s for '%s'"), propname, + zhp->zfs_name); + /* * Check to see if the value applies to this type */ - if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s for '%s': property does not apply to %ss"), - propname, zhp->zfs_name, zfs_type_to_name(zhp->zfs_type)); - return (-1); - } + if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) + return (zfs_error(hdl, EZFS_PROPTYPE, errbuf)); /* * For the mountpoint and sharenfs properties, check if it can be set @@ -804,29 +777,24 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval) if (prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS) { if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { if (getzoneid() == GLOBAL_ZONEID) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s for '%s': " - "dataset is used in a non-global zone"), - propname, zhp->zfs_name); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset is used in a non-global zone")); + return (zfs_error(hdl, EZFS_ZONED, errbuf)); } else if (prop == ZFS_PROP_SHARENFS) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s for '%s': filesystems " - "cannot be shared in a non-global zone"), - propname, zhp->zfs_name); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "filesystems cannot be shared in a " + "non-global zone")); + return (zfs_error(hdl, EZFS_ZONED, errbuf)); } } else if (getzoneid() != GLOBAL_ZONEID) { /* * If zoned property is 'off', this must be in * a globle zone. If not, something is wrong. */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s for '%s': dataset is " - "used in a non-global zone, but 'zoned' " - "property is not set"), - propname, zhp->zfs_name); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset is used in a non-global zone, but " + "'zoned' property is not set")); + return (zfs_error(hdl, EZFS_ZONED, errbuf)); } } @@ -834,11 +802,10 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval) return (-1); if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s for '%s', " - "child dataset with inherited mountpoint is used " - "in a non-global zone"), - propname, zhp->zfs_name); - ret = -1; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "child dataset with inherited mountpoint is used " + "in a non-global zone")); + ret = zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } @@ -853,11 +820,12 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval) switch (prop) { case ZFS_PROP_QUOTA: zc.zc_cookie = number; - ret = zfs_ioctl(ZFS_IOC_SET_QUOTA, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_QUOTA, &zc); break; case ZFS_PROP_RESERVATION: zc.zc_cookie = number; - ret = zfs_ioctl(ZFS_IOC_SET_RESERVATION, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_RESERVATION, + &zc); break; case ZFS_PROP_MOUNTPOINT: case ZFS_PROP_SHARENFS: @@ -870,15 +838,16 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval) sizeof (zc.zc_prop_value)); zc.zc_intsz = 1; zc.zc_numints = strlen(propval) + 1; - ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_PROP, &zc); break; case ZFS_PROP_VOLSIZE: zc.zc_volsize = number; - ret = zfs_ioctl(ZFS_IOC_SET_VOLSIZE, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_VOLSIZE, &zc); break; case ZFS_PROP_VOLBLOCKSIZE: zc.zc_volblocksize = number; - ret = zfs_ioctl(ZFS_IOC_SET_VOLBLOCKSIZE, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_VOLBLOCKSIZE, + &zc); break; default: (void) strlcpy(zc.zc_prop_name, propname, @@ -887,25 +856,13 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval) *(uint64_t *)zc.zc_prop_value = number; zc.zc_intsz = 8; zc.zc_numints = 1; - ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_PROP, &zc); break; } if (ret != 0) { switch (errno) { - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s for '%s': permission " - "denied"), propname, zhp->zfs_name); - break; - - case ENOENT: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot open '%s': no such %s"), zhp->zfs_name, - zfs_type_to_name(zhp->zfs_type)); - break; - case ENOSPC: /* * For quotas and reservations, ENOSPC indicates @@ -914,41 +871,33 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval) */ switch (prop) { case ZFS_PROP_QUOTA: - zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s " - "for '%s': size is less than current " - "used or reserved space"), propname, - zhp->zfs_name); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "size is less than current used or " + "reserved space")); + (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); break; case ZFS_PROP_RESERVATION: - zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s " - "for '%s': size is greater than available " - "space"), propname, zhp->zfs_name); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "size is greater than available space")); + (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); break; default: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s for '%s': out of space"), - propname, zhp->zfs_name); + (void) zfs_standard_error(hdl, errno, errbuf); break; } break; case EBUSY: - if (prop == ZFS_PROP_VOLBLOCKSIZE) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s for '%s': " - "volume already contains data"), - propname, zhp->zfs_name); - } else { - zfs_baderror(errno); - } + if (prop == ZFS_PROP_VOLBLOCKSIZE) + (void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf); + else + return (zfs_standard_error(hdl, EBUSY, errbuf)); break; case EROFS: - zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s for " - "'%s': read only %s"), propname, zhp->zfs_name, - zfs_type_to_name(zhp->zfs_type)); + (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf); break; case EOVERFLOW: @@ -957,16 +906,13 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval) */ #ifdef _ILP32 if (prop == ZFS_PROP_VOLSIZE) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot set %s for '%s': " - "max volume size is 1TB on 32-bit systems"), - propname, zhp->zfs_name); + (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf); break; } #endif - zfs_baderror(errno); + /* FALLTHROUGH */ default: - zfs_baderror(errno); + (void) zfs_standard_error(hdl, errno, errbuf); } } else { /* @@ -994,44 +940,35 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop) zfs_cmd_t zc = { 0 }; int ret; prop_changelist_t *cl; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot inherit %s for '%s'"), propname, zhp->zfs_name); /* * Verify that this property is inheritable. */ - if (zfs_prop_readonly(prop)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot inherit %s for '%s': property is read-only"), - propname, zhp->zfs_name); - return (-1); - } + if (zfs_prop_readonly(prop)) + return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf)); - if (!zfs_prop_inheritable(prop)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot inherit %s for '%s': property is not inheritable"), - propname, zhp->zfs_name); - return (-1); - } + if (!zfs_prop_inheritable(prop)) + return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf)); /* * Check to see if the value applies to this type */ - if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot inherit %s for '%s': property does " - "not apply to %ss"), propname, zhp->zfs_name, - zfs_type_to_name(zhp->zfs_type)); - return (-1); - } + if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) + return (zfs_error(hdl, EZFS_PROPTYPE, errbuf)); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_prop_name, propname, sizeof (zc.zc_prop_name)); if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID && zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s', " - "dataset is used in a non-global zone"), propname, - zhp->zfs_name); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset is used in a non-global zone")); + return (zfs_error(hdl, EZFS_ZONED, errbuf)); } /* @@ -1041,11 +978,10 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop) return (-1); if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s', " - "child dataset with inherited mountpoint is " - "used in a non-global zone"), - propname, zhp->zfs_name); - ret = -1; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "child dataset with inherited mountpoint is used " + "in a non-global zone")); + ret = zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } @@ -1054,27 +990,9 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop) zc.zc_numints = 0; - if ((ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc)) != 0) { - switch (errno) { - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot inherit %s for '%s': permission " - "denied"), propname, zhp->zfs_name); - break; - case ENOENT: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot open '%s': no such %s"), zhp->zfs_name, - zfs_type_to_name(zhp->zfs_type)); - break; - case ENOSPC: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot inherit %s for '%s': " - "out of space"), propname, zhp->zfs_name); - break; - default: - zfs_baderror(errno); - } - + if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, + ZFS_IOC_SET_PROP, &zc)) != 0) { + return (zfs_standard_error(hdl, errno, errbuf)); } else { if ((ret = changelist_postfix(cl)) != 0) @@ -1151,11 +1069,10 @@ getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) * If they differ from the on-disk values, report the current values and mark * the source "temporary". */ -static uint64_t +static int get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src, - char **source) + char **source, uint64_t *val) { - uint64_t val; struct mnttab mnt; *source = NULL; @@ -1167,86 +1084,90 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src, switch (prop) { case ZFS_PROP_ATIME: - val = getprop_uint64(zhp, prop, source); + *val = getprop_uint64(zhp, prop, source); - if (hasmntopt(&mnt, MNTOPT_ATIME) && !val) { - val = TRUE; + if (hasmntopt(&mnt, MNTOPT_ATIME) && !*val) { + *val = B_TRUE; if (src) *src = ZFS_SRC_TEMPORARY; - } else if (hasmntopt(&mnt, MNTOPT_NOATIME) && val) { - val = FALSE; + } else if (hasmntopt(&mnt, MNTOPT_NOATIME) && *val) { + *val = B_FALSE; if (src) *src = ZFS_SRC_TEMPORARY; } - return (val); + break; case ZFS_PROP_AVAILABLE: - return (zhp->zfs_dmustats.dds_available); + *val = zhp->zfs_dmustats.dds_available; + break; case ZFS_PROP_DEVICES: - val = getprop_uint64(zhp, prop, source); + *val = getprop_uint64(zhp, prop, source); - if (hasmntopt(&mnt, MNTOPT_DEVICES) && !val) { - val = TRUE; + if (hasmntopt(&mnt, MNTOPT_DEVICES) && !*val) { + *val = B_TRUE; if (src) *src = ZFS_SRC_TEMPORARY; - } else if (hasmntopt(&mnt, MNTOPT_NODEVICES) && val) { - val = FALSE; + } else if (hasmntopt(&mnt, MNTOPT_NODEVICES) && *val) { + *val = B_FALSE; if (src) *src = ZFS_SRC_TEMPORARY; } - return (val); + break; case ZFS_PROP_EXEC: - val = getprop_uint64(zhp, prop, source); + *val = getprop_uint64(zhp, prop, source); - if (hasmntopt(&mnt, MNTOPT_EXEC) && !val) { - val = TRUE; + if (hasmntopt(&mnt, MNTOPT_EXEC) && !*val) { + *val = B_TRUE; if (src) *src = ZFS_SRC_TEMPORARY; - } else if (hasmntopt(&mnt, MNTOPT_NOEXEC) && val) { - val = FALSE; + } else if (hasmntopt(&mnt, MNTOPT_NOEXEC) && *val) { + *val = B_FALSE; if (src) *src = ZFS_SRC_TEMPORARY; } - return (val); + break; case ZFS_PROP_RECORDSIZE: case ZFS_PROP_COMPRESSION: case ZFS_PROP_ZONED: - val = getprop_uint64(zhp, prop, source); - return (val); + *val = getprop_uint64(zhp, prop, source); + break; case ZFS_PROP_READONLY: - val = getprop_uint64(zhp, prop, source); + *val = getprop_uint64(zhp, prop, source); - if (hasmntopt(&mnt, MNTOPT_RO) && !val) { - val = TRUE; + if (hasmntopt(&mnt, MNTOPT_RO) && !*val) { + *val = B_TRUE; if (src) *src = ZFS_SRC_TEMPORARY; - } else if (hasmntopt(&mnt, MNTOPT_RW) && val) { - val = FALSE; + } else if (hasmntopt(&mnt, MNTOPT_RW) && *val) { + *val = B_FALSE; if (src) *src = ZFS_SRC_TEMPORARY; } - return (val); + break; case ZFS_PROP_CREATION: - return (zhp->zfs_dmustats.dds_creation_time); + *val = zhp->zfs_dmustats.dds_creation_time; + break; case ZFS_PROP_QUOTA: if (zhp->zfs_dmustats.dds_quota == 0) *source = ""; /* default */ else *source = zhp->zfs_name; - return (zhp->zfs_dmustats.dds_quota); + *val = zhp->zfs_dmustats.dds_quota; + break; case ZFS_PROP_RESERVATION: if (zhp->zfs_dmustats.dds_reserved == 0) *source = ""; /* default */ else *source = zhp->zfs_name; - return (zhp->zfs_dmustats.dds_reserved); + *val = zhp->zfs_dmustats.dds_reserved; + break; case ZFS_PROP_COMPRESSRATIO: /* @@ -1255,43 +1176,50 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src, * 100, so '2.5x' would be returned as 250. */ if (zhp->zfs_dmustats.dds_compressed_bytes == 0) - return (100ULL); + *val = 100ULL; else - return (zhp->zfs_dmustats.dds_uncompressed_bytes * 100 / + *val = + (zhp->zfs_dmustats.dds_uncompressed_bytes * 100 / zhp->zfs_dmustats.dds_compressed_bytes); + break; case ZFS_PROP_REFERENCED: /* * 'referenced' refers to the amount of physical space * referenced (possibly shared) by this object. */ - return (zhp->zfs_dmustats.dds_space_refd); + *val = zhp->zfs_dmustats.dds_space_refd; + break; case ZFS_PROP_SETUID: - val = getprop_uint64(zhp, prop, source); + *val = getprop_uint64(zhp, prop, source); - if (hasmntopt(&mnt, MNTOPT_SETUID) && !val) { - val = TRUE; + if (hasmntopt(&mnt, MNTOPT_SETUID) && !*val) { + *val = B_TRUE; if (src) *src = ZFS_SRC_TEMPORARY; - } else if (hasmntopt(&mnt, MNTOPT_NOSETUID) && val) { - val = FALSE; + } else if (hasmntopt(&mnt, MNTOPT_NOSETUID) && *val) { + *val = B_FALSE; if (src) *src = ZFS_SRC_TEMPORARY; } - return (val); + break; case ZFS_PROP_VOLSIZE: - return (zhp->zfs_volsize); + *val = zhp->zfs_volsize; + break; case ZFS_PROP_VOLBLOCKSIZE: - return (zhp->zfs_volblocksize); + *val = zhp->zfs_volblocksize; + break; case ZFS_PROP_USED: - return (zhp->zfs_dmustats.dds_space_used); + *val = zhp->zfs_dmustats.dds_space_used; + break; case ZFS_PROP_CREATETXG: - return (zhp->zfs_dmustats.dds_creation_txg); + *val = zhp->zfs_dmustats.dds_creation_txg; + break; case ZFS_PROP_MOUNTED: /* @@ -1306,16 +1234,22 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src, search.mnt_special = (char *)zhp->zfs_name; search.mnt_fstype = MNTTYPE_ZFS; - rewind(zfs_mnttab()); + rewind(zhp->zfs_hdl->libzfs_mnttab); - if (getmntany(zfs_mnttab(), &entry, &search) == 0) - zhp->zfs_mntopts = - zfs_strdup(entry.mnt_mntopts); + if (getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, + &search) == 0 && (zhp->zfs_mntopts = + zfs_strdup(zhp->zfs_hdl, + entry.mnt_mntopts)) == NULL) + return (-1); } - return (zhp->zfs_mntopts != NULL); + *val = (zhp->zfs_mntopts != NULL); + break; default: - zfs_baderror(EINVAL); + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "cannot get non-numeric property")); + return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP, + dgettext(TEXT_DOMAIN, "internal error"))); } return (0); @@ -1355,7 +1289,7 @@ get_source(zfs_handle_t *zhp, zfs_source_t *srctype, char *source, */ int zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, - zfs_source_t *src, char *statbuf, size_t statlen, int literal) + zfs_source_t *src, char *statbuf, size_t statlen, boolean_t literal) { char *source = NULL; uint64_t val; @@ -1383,8 +1317,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, * Basic boolean values are built on top of * get_numeric_property(). */ - nicebool(get_numeric_property(zhp, prop, src, &source), - propbuf, proplen); + if (get_numeric_property(zhp, prop, src, &source, &val) != 0) + return (-1); + nicebool(val, propbuf, proplen); break; @@ -1399,7 +1334,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, * Basic numeric values are built on top of * get_numeric_property(). */ - val = get_numeric_property(zhp, prop, src, &source); + if (get_numeric_property(zhp, prop, src, &source, &val) != 0) + return (-1); if (literal) (void) snprintf(propbuf, proplen, "%llu", val); else @@ -1533,7 +1469,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, case ZFS_PROP_QUOTA: case ZFS_PROP_RESERVATION: - val = get_numeric_property(zhp, prop, src, &source); + if (get_numeric_property(zhp, prop, src, &source, &val) != 0) + return (-1); /* * If quota or reservation is 0, we translate this into 'none' @@ -1555,7 +1492,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, break; case ZFS_PROP_COMPRESSRATIO: - val = get_numeric_property(zhp, prop, src, &source); + if (get_numeric_property(zhp, prop, src, &source, &val) != 0) + return (-1); (void) snprintf(propbuf, proplen, "%lld.%02lldx", val / 100, val % 100); break; @@ -1572,7 +1510,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, str = "snapshot"; break; default: - zfs_baderror(zhp->zfs_type); + abort(); } (void) snprintf(propbuf, proplen, "%s", str); break; @@ -1584,7 +1522,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, * it's a boolean value, the typical values of "on" and "off" * don't make sense, so we translate to "yes" and "no". */ - if (get_numeric_property(zhp, ZFS_PROP_MOUNTED, src, &source)) + if (get_numeric_property(zhp, ZFS_PROP_MOUNTED, + src, &source, &val) != 0) + return (-1); + if (val) (void) strlcpy(propbuf, "yes", proplen); else (void) strlcpy(propbuf, "no", proplen); @@ -1600,7 +1541,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, break; default: - zfs_baderror(EINVAL); + abort(); } get_source(zhp, src, source, statbuf, statlen); @@ -1618,8 +1559,11 @@ zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop) { char *source; zfs_source_t sourcetype = ZFS_SRC_NONE; + uint64_t val; + + (void) get_numeric_property(zhp, prop, &sourcetype, &source, &val); - return (get_numeric_property(zhp, prop, &sourcetype, &source)); + return (val); } /* @@ -1635,12 +1579,15 @@ zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value, * Check to see if this property applies to our object */ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) - return (-1); + return (zfs_error(zhp->zfs_hdl, EZFS_PROPTYPE, + dgettext(TEXT_DOMAIN, "cannot get property '%s'"), + zfs_prop_to_name(prop))); if (src) *src = ZFS_SRC_NONE; - *value = get_numeric_property(zhp, prop, src, &source); + if (get_numeric_property(zhp, prop, src, &source, value) != 0) + return (-1); get_source(zhp, src, source, statbuf, statlen); @@ -1676,7 +1623,7 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) int ret; for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - zfs_ioctl(ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; + ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) { /* * Ignore private dataset names. @@ -1688,7 +1635,8 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) * Silently ignore errors, as the only plausible explanation is * that the pool has since been removed. */ - if ((nzhp = make_dataset_handle(zc.zc_name)) == NULL) + if ((nzhp = make_dataset_handle(zhp->zfs_hdl, + zc.zc_name)) == NULL) continue; if ((ret = func(nzhp, data)) != 0) @@ -1701,7 +1649,8 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) * obtained the handle. */ if (errno != ESRCH && errno != ENOENT) - zfs_baderror(errno); + return (zfs_standard_error(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot iterate filesystems"))); return (0); } @@ -1717,10 +1666,12 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data) int ret; for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - zfs_ioctl(ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0; + ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, + &zc) == 0; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) { - if ((nzhp = make_dataset_handle(zc.zc_name)) == NULL) + if ((nzhp = make_dataset_handle(zhp->zfs_hdl, + zc.zc_name)) == NULL) continue; if ((ret = func(nzhp, data)) != 0) @@ -1733,7 +1684,8 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data) * obtained the handle. Silently ignore this case, and return success. */ if (errno != ESRCH && errno != ENOENT) - zfs_baderror(errno); + return (zfs_standard_error(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot iterate filesystems"))); return (0); } @@ -1774,21 +1726,22 @@ parent_name(const char *path, char *buf, size_t buflen) * Checks to make sure that the given path has a parent, and that it exists. */ static int -check_parents(const char *path, zfs_type_t type) +check_parents(libzfs_handle_t *hdl, const char *path) { zfs_cmd_t zc = { 0 }; char parent[ZFS_MAXNAMELEN]; char *slash; zfs_handle_t *zhp; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'", + path); /* get parent, and check to see if this is just a pool */ if (parent_name(path, parent, sizeof (parent)) != 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': missing dataset name"), - path, zfs_type_to_name(type)); - zfs_error(dgettext(TEXT_DOMAIN, - "use 'zpool create' to create a storage pool")); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "missing dataset name")); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } /* check to see if the pool exists */ @@ -1796,40 +1749,39 @@ check_parents(const char *path, zfs_type_t type) slash = parent + strlen(parent); (void) strncpy(zc.zc_name, parent, slash - parent); zc.zc_name[slash - parent] = '\0'; - if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0 && + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 && errno == ENOENT) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': no such pool '%s'"), path, zc.zc_name); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no such pool '%s'"), zc.zc_name); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); } /* check to see if the parent dataset exists */ - if ((zhp = make_dataset_handle(parent)) == NULL) { + if ((zhp = make_dataset_handle(hdl, parent)) == NULL) { switch (errno) { case ENOENT: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': parent does not exist"), path); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent does not exist")); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); default: - zfs_baderror(errno); + return (zfs_standard_error(hdl, errno, errbuf)); } } /* we are in a non-global zone, but parent is in the global zone */ if (getzoneid() != GLOBAL_ZONEID && !zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': permission denied"), path); + (void) zfs_standard_error(hdl, EPERM, errbuf); zfs_close(zhp); return (-1); } /* make sure parent is a filesystem */ if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': parent is not a filesystem"), - path); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent is not a filesystem")); + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); zfs_close(zhp); return (-1); } @@ -1843,44 +1795,35 @@ check_parents(const char *path, zfs_type_t type) * only for volumes, and indicate the size and blocksize of the volume. */ int -zfs_create(const char *path, zfs_type_t type, +zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, const char *sizestr, const char *blocksizestr) { - char reason[64]; zfs_cmd_t zc = { 0 }; int ret; uint64_t size = 0; uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); + char errbuf[1024]; /* convert sizestr into integer size */ - if (sizestr != NULL && nicestrtonum(sizestr, &size, - reason, sizeof (reason)) != 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad volume size '%s': %s"), sizestr, reason); - return (-1); - } + if (sizestr != NULL && nicestrtonum(hdl, sizestr, &size) != 0) + return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, + "bad volume size '%s'"), sizestr)); /* convert blocksizestr into integer blocksize */ - if (blocksizestr != NULL && nicestrtonum(blocksizestr, &blocksize, - reason, sizeof (reason)) != 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad volume blocksize '%s': %s"), blocksizestr, reason); - return (-1); - } + if (blocksizestr != NULL && nicestrtonum(hdl, blocksizestr, + &blocksize) != 0) + return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, + "bad volume blocksize '%s'"), blocksizestr)); + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot create '%s'"), path); /* validate the path, taking care to note the extended error message */ - if (!zfs_validate_name(path, type, reason, sizeof (reason))) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': %s in %s name"), path, reason, - zfs_type_to_name(type)); - if (strstr(reason, "snapshot") != NULL) - zfs_error(dgettext(TEXT_DOMAIN, - "use 'zfs snapshot' to create a snapshot")); - return (-1); - } + if (!zfs_validate_name(hdl, path, type)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* validate parents exist */ - if (check_parents(path, type) != 0) + if (check_parents(hdl, path) != 0) return (-1); /* @@ -1891,10 +1834,10 @@ zfs_create(const char *path, zfs_type_t type, * first try to see if the dataset exists. */ (void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name)); - if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) == 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': dataset exists"), path); - return (-1); + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset already exists")); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (type == ZFS_TYPE_VOLUME) @@ -1911,30 +1854,30 @@ zfs_create(const char *path, zfs_type_t type, * zero. */ if (size == 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad volume size '%s': cannot be zero"), sizestr); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot be zero")); + return (zfs_error(hdl, EZFS_BADPROP, + dgettext(TEXT_DOMAIN, "bad volume size '%s'"), + sizestr)); } if (blocksize < SPA_MINBLOCKSIZE || blocksize > SPA_MAXBLOCKSIZE || !ISP2(blocksize)) { - zfs_error(dgettext(TEXT_DOMAIN, - "bad volume block size '%s': " + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "must be power of 2 from %u to %uk"), - blocksizestr, (uint_t)SPA_MINBLOCKSIZE, (uint_t)SPA_MAXBLOCKSIZE >> 10); - return (-1); + return (zfs_error(hdl, EZFS_BADPROP, + dgettext(TEXT_DOMAIN, + "bad volume block size '%s'"), blocksizestr)); } if (size % blocksize != 0) { - char buf[64]; - zfs_nicenum(blocksize, buf, sizeof (buf)); - zfs_error(dgettext(TEXT_DOMAIN, - "bad volume size '%s': " - "must be multiple of volume block size (%s)"), - sizestr, buf); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must be a multiple of volume block size")); + return (zfs_error(hdl, EZFS_BADPROP, + dgettext(TEXT_DOMAIN, "bad volume size '%s'"), + sizestr)); } zc.zc_volsize = size; @@ -1942,10 +1885,10 @@ zfs_create(const char *path, zfs_type_t type, } /* create the dataset */ - ret = zfs_ioctl(ZFS_IOC_CREATE, &zc); + ret = ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE, &zc); if (ret == 0 && type == ZFS_TYPE_VOLUME) - ret = zvol_create_link(path); + ret = zvol_create_link(hdl, path); /* check for failure */ if (ret != 0) { @@ -1954,81 +1897,38 @@ zfs_create(const char *path, zfs_type_t type, switch (errno) { case ENOENT: - /* - * The parent dataset has been deleted since our - * previous check. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': no such parent '%s'"), - path, parent); - break; - - case EPERM: - /* - * The user doesn't have permission to create a new - * dataset here. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': permission denied"), path); - break; - - case EDQUOT: - case ENOSPC: - /* - * The parent dataset does not have enough free space - * to create a new dataset. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': not enough space in '%s'"), - path, parent); - break; - - case EEXIST: - /* - * The target dataset already exists. We should have - * caught this above, but there may be some unexplained - * race condition. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': dataset exists"), path); - break; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no such parent '%s'"), parent); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); case EINVAL: - /* - * The target dataset does not support children. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': children unsupported in '%s'"), - path, parent); - break; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent '%s' is not a filesysem"), parent); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); case EDOM: - zfs_error(dgettext(TEXT_DOMAIN, "bad %s value '%s': " + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "must be power of 2 from %u to %uk"), - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - blocksizestr ? blocksizestr : "<unknown>", (uint_t)SPA_MINBLOCKSIZE, (uint_t)SPA_MAXBLOCKSIZE >> 10); - break; + + return (zfs_error(hdl, EZFS_BADPROP, + dgettext(TEXT_DOMAIN, "bad block size '%s'"), + blocksizestr ? blocksizestr : "<unknown>")); + #ifdef _ILP32 case EOVERFLOW: /* * This platform can't address a volume this big. */ - if (type == ZFS_TYPE_VOLUME) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': " - "max volume size is 1TB on 32-bit systems"), - path); - break; - } + if (type == ZFS_TYPE_VOLUME) + return (zfs_error(hdl, EZFS_VOLTOOBIG, + errbuf)); #endif - + /* FALLTHROUGH */ default: - zfs_baderror(errno); + return (zfs_standard_error(hdl, errno, errbuf)); } - - return (-1); } return (0); @@ -2043,6 +1943,7 @@ zfs_destroy(zfs_handle_t *zhp) { zfs_cmd_t zc = { 0 }; int ret; + char errbuf[1024]; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -2051,7 +1952,7 @@ zfs_destroy(zfs_handle_t *zhp) * so that we do the right thing for snapshots of volumes. */ if (zhp->zfs_volblocksize != 0) { - if (zvol_remove_link(zhp->zfs_name) != 0) + if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) return (-1); zc.zc_objset_type = DMU_OST_ZVOL; @@ -2059,63 +1960,15 @@ zfs_destroy(zfs_handle_t *zhp) zc.zc_objset_type = DMU_OST_ZFS; } - ret = zfs_ioctl(ZFS_IOC_DESTROY, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc); - if (ret != 0) { - switch (errno) { - - case EPERM: - /* - * We don't have permission to destroy this dataset. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot destroy '%s': permission denied"), - zhp->zfs_name); - break; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot destroy '%s'"), zhp->zfs_name); - case EIO: - /* - * I/O error. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot destroy '%s': I/O error"), - zhp->zfs_name); - break; - - case ENOENT: - /* - * We've hit a race condition where the dataset has been - * destroyed since we opened it. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot destroy '%s': no such %s"), - zhp->zfs_name, zfs_type_to_name(zhp->zfs_type)); - break; - - case EBUSY: - /* - * Even if we destroy all children, there is a chance we - * can hit this case if: - * - * - A child dataset has since been created - * - A filesystem is mounted - * - * This error message is awful, but hopefully we've - * already caught the common cases (and aborted more - * appropriately) before calling this function. There's - * nothing else we can do at this point. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot destroy '%s': %s is busy"), - zhp->zfs_name, zfs_type_to_name(zhp->zfs_type)); - break; - - default: - zfs_baderror(errno); - } - - return (-1); - } + if (ret != 0) + return (zfs_standard_error(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), + zhp->zfs_name)); remove_mountpoint(zhp); @@ -2128,24 +1981,23 @@ zfs_destroy(zfs_handle_t *zhp) int zfs_clone(zfs_handle_t *zhp, const char *target) { - char reason[64]; zfs_cmd_t zc = { 0 }; char parent[ZFS_MAXNAMELEN]; int ret; + char errbuf[1024]; + libzfs_handle_t *hdl = zhp->zfs_hdl; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot create '%s'"), target); + /* validate the target name */ - if (!zfs_validate_name(target, ZFS_TYPE_FILESYSTEM, reason, - sizeof (reason))) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': %s in filesystem name"), target, - reason, zfs_type_to_name(ZFS_TYPE_FILESYSTEM)); - return (-1); - } + if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* validate parents exist */ - if (check_parents(target, zhp->zfs_type) != 0) + if (check_parents(zhp->zfs_hdl, target) != 0) return (-1); (void) parent_name(target, parent, sizeof (parent)); @@ -2158,18 +2010,10 @@ zfs_clone(zfs_handle_t *zhp, const char *target) (void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_filename, zhp->zfs_name, sizeof (zc.zc_filename)); - ret = zfs_ioctl(ZFS_IOC_CREATE, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc); if (ret != 0) { switch (errno) { - case EPERM: - /* - * The user doesn't have permission to create the clone. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': permission denied"), - target); - break; case ENOENT: /* @@ -2181,42 +2025,147 @@ zfs_clone(zfs_handle_t *zhp, const char *target) * that doesn't exist anymore, or whether the target * dataset doesn't exist. */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': no such parent '%s'"), - target, parent); - break; + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "no such parent '%s'"), parent); + return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); - case EDQUOT: - case ENOSPC: - /* - * There is not enough space in the target dataset - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': not enough space in '%s'"), - target, parent); - break; + case EXDEV: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "source and target pools differ")); + return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET, + errbuf)); - case EEXIST: - /* - * The target already exists. - */ - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': dataset exists"), target); - break; + default: + return (zfs_standard_error(zhp->zfs_hdl, errno, + errbuf)); + } + } else if (zhp->zfs_volblocksize != 0) { + ret = zvol_create_link(zhp->zfs_hdl, target); + } - case EXDEV: + return (ret); +} + +typedef struct promote_data { + char cb_mountpoint[MAXPATHLEN]; + const char *cb_target; + const char *cb_errbuf; + uint64_t cb_pivot_txg; +} promote_data_t; + +static int +promote_snap_cb(zfs_handle_t *zhp, void *data) +{ + promote_data_t *pd = data; + zfs_handle_t *szhp; + int err; + char snapname[MAXPATHLEN]; + char *cp; + + /* We don't care about snapshots after the pivot point */ + if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) + return (0); + + /* + * Unmount it. We actually need to open it to provoke it to be + * mounted first, because if it is not mounted, umount2 will + * mount it! + */ + (void) strcpy(snapname, pd->cb_mountpoint); + (void) strcat(snapname, "/.zfs/snapshot/"); + cp = strchr(zhp->zfs_name, '@'); + (void) strcat(snapname, cp+1); + err = open(snapname, O_RDONLY); + if (err != -1) + (void) close(err); + (void) umount2(snapname, MS_FORCE); + + /* Check for conflicting names */ + (void) strcpy(snapname, pd->cb_target); + (void) strcat(snapname, cp); + szhp = make_dataset_handle(zhp->zfs_hdl, snapname); + if (szhp != NULL) { + zfs_close(szhp); + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "snapshot name '%s' from origin \n" + "conflicts with '%s' from target"), + zhp->zfs_name, snapname); + return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf)); + } + return (0); +} + +/* + * Promotes the given clone fs to be the clone parent. + */ +int +zfs_promote(zfs_handle_t *zhp) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + zfs_cmd_t zc = { 0 }; + char parent[MAXPATHLEN]; + char *cp; + int ret; + zfs_handle_t *pzhp; + promote_data_t pd; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot promote '%s'"), zhp->zfs_name); + + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshots can not be promoted")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + } + + (void) strcpy(parent, zhp->zfs_dmustats.dds_clone_of); + if (parent[0] == '\0') { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not a cloned filesystem")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + } + cp = strchr(parent, '@'); + *cp = '\0'; + + /* Walk the snapshots we will be moving */ + pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT); + if (pzhp == NULL) + return (-1); + pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG); + zfs_close(pzhp); + pd.cb_target = zhp->zfs_name; + pd.cb_errbuf = errbuf; + pzhp = zfs_open(hdl, parent, ZFS_TYPE_ANY); + if (pzhp == NULL) + return (-1); + (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint, + sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE); + ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd); + if (ret != 0) + return (-1); + + /* issue the ioctl */ + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + ret = ioctl(hdl->libzfs_fd, ZFS_IOC_PROMOTE, &zc); + + if (ret != 0) { + switch (errno) { + + case EEXIST: /* - * The source and target pools differ. + * There is a conflicting snapshot name. We + * should have caught this above, but they could + * have renamed something in the mean time. */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "source and target pools differ"), target); - break; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "conflicting snapshot name from parent '%s'"), + parent); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); default: - zfs_baderror(errno); + return (zfs_standard_error(hdl, errno, errbuf)); } - } else if (zhp->zfs_volblocksize != 0) { - ret = zvol_create_link(target); } return (ret); @@ -2226,40 +2175,36 @@ zfs_clone(zfs_handle_t *zhp, const char *target) * Takes a snapshot of the given dataset */ int -zfs_snapshot(const char *path) +zfs_snapshot(libzfs_handle_t *hdl, const char *path) { - char reason[64]; const char *delim; char *parent; zfs_handle_t *zhp; zfs_cmd_t zc = { 0 }; int ret; + char errbuf[1024]; - /* validate the snapshot name */ - if (!zfs_validate_name(path, ZFS_TYPE_SNAPSHOT, reason, - sizeof (reason))) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot snapshot '%s': %s in snapshot name"), path, - reason); - return (-1); - } + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot snapshot '%s'"), path); + + /* validate the target name */ + if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); /* make sure we have a snapshot */ if ((delim = strchr(path, '@')) == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot snapshot '%s': missing '@' delim in snapshot " - "name"), path); - zfs_error(dgettext(TEXT_DOMAIN, - "use 'zfs create' to create a filesystem")); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "missing '@' delimeter in snapshot name")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } /* make sure the parent exists and is of the appropriate type */ - parent = zfs_malloc(delim - path + 1); + if ((parent = zfs_alloc(hdl, delim - path + 1)) == NULL) + return (-1); (void) strncpy(parent, path, delim - path); parent[delim - path] = '\0'; - if ((zhp = zfs_open(parent, ZFS_TYPE_FILESYSTEM | + if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { free(parent); return (-1); @@ -2272,56 +2217,17 @@ zfs_snapshot(const char *path) else zc.zc_objset_type = DMU_OST_ZFS; - ret = zfs_ioctl(ZFS_IOC_CREATE, &zc); + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc); if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) { - ret = zvol_create_link(path); + ret = zvol_create_link(zhp->zfs_hdl, path); if (ret != 0) - (void) zfs_ioctl(ZFS_IOC_DESTROY, &zc); + (void) ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY, + &zc); } - if (ret != 0) { - switch (errno) { - case EPERM: - /* - * User doesn't have permission to create a snapshot - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "permission denied"), path); - break; - - case EDQUOT: - case ENOSPC: - /* - * Out of space in parent. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "not enough space in '%s'"), path, parent); - break; - - case EEXIST: - /* - * Snapshot already exists. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "snapshot exists"), path); - break; - - case ENOENT: - /* - * Shouldn't happen because we verified the parent - * above. But there may be a race condition where it - * has since been removed. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': " - "no such %s"), parent, - zfs_type_to_name(zhp->zfs_type)); - break; - - default: - zfs_baderror(errno); - } - } + if (ret != 0) + (void) zfs_standard_error(hdl, errno, errbuf); free(parent); zfs_close(zhp); @@ -2337,6 +2243,11 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from) { zfs_cmd_t zc = { 0 }; int ret; + char errbuf[1024]; + libzfs_handle_t *hdl = zhp_to->zfs_hdl; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot send '%s'"), zhp_to->zfs_name); /* do the ioctl() */ (void) strlcpy(zc.zc_name, zhp_to->zfs_name, sizeof (zc.zc_name)); @@ -2348,34 +2259,14 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from) } zc.zc_cookie = STDOUT_FILENO; - ret = zfs_ioctl(ZFS_IOC_SENDBACKUP, &zc); + ret = ioctl(zhp_to->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc); if (ret != 0) { switch (errno) { - case EPERM: - /* - * User doesn't have permission to do a send - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot send '%s': " - "permission denied"), zhp_to->zfs_name); - break; case EXDEV: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot send incremental from %s:\n" - "it is not an earlier snapshot from the " - "same fs as %s"), - zhp_from->zfs_name, zhp_to->zfs_name); - break; - - case ENOENT: - /* - * Shouldn't happen because we verified the parent - * above. But there may be a race condition where it - * has since been removed. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot open: " - "no such snapshot")); - break; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not an ealier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); case EDQUOT: case EFBIG: @@ -2388,18 +2279,11 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from) case ERANGE: case EFAULT: case EROFS: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot write stream: %s"), - strerror(errno)); - break; - - case EINTR: - zfs_error(dgettext(TEXT_DOMAIN, - "send failed: signal received")); - break; + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); default: - zfs_baderror(errno); + return (zfs_standard_error(hdl, errno, errbuf)); } } @@ -2410,7 +2294,8 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from) * Restores a backup of tosnap from stdin. */ int -zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) +zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix, + int verbose, int dryrun) { zfs_cmd_t zc = { 0 }; time_t begin_time; @@ -2418,9 +2303,13 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) char *cp; dmu_replay_record_t drr; struct drr_begin *drrb = &zc.zc_begin_record; + char errbuf[1024]; begin_time = time(NULL); + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive")); + /* trim off snapname, if any */ (void) strcpy(zc.zc_name, tosnap); cp = strchr(zc.zc_name, '@'); @@ -2437,31 +2326,26 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) } while (size > 0); if (size < 0 || bytes != sizeof (drr)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: invalid stream " - "(couldn't read first record)")); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "stream (failed to read first record)")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } zc.zc_begin_record = drr.drr_u.drr_begin; if (drrb->drr_magic != DMU_BACKUP_MAGIC && drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: invalid stream " - "(invalid magic number)")); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "stream (bad magic number)")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } if (drrb->drr_version != DMU_BACKUP_VERSION && drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) { - if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) - drrb->drr_version = BSWAP_64(drrb->drr_version); - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: only stream version 0x%llx is supported, " - "stream is version %llx."), + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version " + "0x%llx is supported (stream is version 0x%llx)"), DMU_BACKUP_VERSION, drrb->drr_version); - return (-1); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } /* @@ -2470,10 +2354,9 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) (void) strcpy(zc.zc_filename, tosnap); if (isprefix) { if (strchr(tosnap, '@') != NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: " - "argument to -d must be a filesystem")); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination must be a filesystem")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } cp = strchr(drr.drr_u.drr_begin.drr_toname, '/'); @@ -2490,11 +2373,8 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) * snapname from the backup. */ cp = strchr(drr.drr_u.drr_begin.drr_toname, '@'); - if (cp == NULL || strlen(tosnap) + strlen(cp) >= MAXNAMELEN) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: invalid snapshot name")); - return (-1); - } + if (cp == NULL || strlen(tosnap) + strlen(cp) >= MAXNAMELEN) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); (void) strcat(zc.zc_filename, cp); } @@ -2508,20 +2388,16 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) *cp = '\0'; /* make sure destination fs exists */ - h = zfs_open(zc.zc_name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (h == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive incrememtal stream: destination\n" - "filesystem %s does not exist"), - zc.zc_name); + h = zfs_open(hdl, zc.zc_name, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (h == NULL) return (-1); - } if (!dryrun) { /* unmount destination fs or remove device link. */ if (h->zfs_type == ZFS_TYPE_FILESYSTEM) { (void) zfs_unmount(h, NULL, 0); } else { - (void) zvol_remove_link(h->zfs_name); + (void) zvol_remove_link(hdl, h->zfs_name); } } zfs_close(h); @@ -2535,24 +2411,18 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) cp = strchr(zc.zc_name, '@'); if (cp) *cp = '\0'; - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: destination fs %s already exists"), - zc.zc_name); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination '%s' already exists"), zc.zc_name); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (isprefix) { zfs_handle_t *h; /* make sure prefix exists */ - h = zfs_open(tosnap, ZFS_TYPE_FILESYSTEM); - if (h == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: " - "%s is an invalid destination"), - tosnap); + h = zfs_open(hdl, tosnap, ZFS_TYPE_FILESYSTEM); + if (h == NULL) return (-1); - } zfs_close(h); /* create any necessary ancestors up to prefix */ @@ -2569,24 +2439,25 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) const char *opname; *cp = '\0'; - opname = "create"; - if (zfs_create(zc.zc_name, ZFS_TYPE_FILESYSTEM, - NULL, NULL) != 0) { + opname = dgettext(TEXT_DOMAIN, "create"); + if (zfs_create(hdl, zc.zc_name, + ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0) { if (errno == EEXIST) continue; goto ancestorerr; } - opname = "open"; - h = zfs_open(zc.zc_name, ZFS_TYPE_FILESYSTEM); + opname = dgettext(TEXT_DOMAIN, "open"); + h = zfs_open(hdl, zc.zc_name, + ZFS_TYPE_FILESYSTEM); if (h == NULL) goto ancestorerr; - opname = "mount"; + opname = dgettext(TEXT_DOMAIN, "mount"); if (zfs_mount(h, NULL, 0) != 0) goto ancestorerr; - opname = "share"; + opname = dgettext(TEXT_DOMAIN, "share"); if (zfs_share(h) != 0) goto ancestorerr; @@ -2594,22 +2465,21 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun) continue; ancestorerr: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: couldn't %s ancestor %s"), - opname, zc.zc_name); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to %s ancestor '%s'"), opname, + zc.zc_name); + return (zfs_error(hdl, EZFS_BADRESTORE, + errbuf)); } } /* Make sure destination fs does not exist */ cp = strchr(zc.zc_name, '@'); *cp = '\0'; - if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) == 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive full stream: " - "destination filesystem %s already exists"), - zc.zc_name); - return (-1); + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination '%s' exists"), zc.zc_name); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } /* Do the recvbackup ioctl to the fs's parent. */ @@ -2630,21 +2500,20 @@ ancestorerr: } if (dryrun) return (0); - err = ioctl_err = zfs_ioctl(ZFS_IOC_RECVBACKUP, &zc); + err = ioctl_err = ioctl(hdl->libzfs_fd, ZFS_IOC_RECVBACKUP, &zc); if (ioctl_err != 0) { switch (errno) { case ENODEV: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: " - "most recent snapshot does not " - "match incremental source")); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "most recent snapshot does not match incremental " + "source")); + (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; case ETXTBSY: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: " - "destination has been modified since " - "most recent snapshot --\n" - "use 'zfs rollback' to discard changes")); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination has been modified since most recent " + "snapshot")); + (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; case EEXIST: if (drrb->drr_fromguid == 0) { @@ -2652,45 +2521,21 @@ ancestorerr: cp = strchr(zc.zc_filename, '@'); *cp = '\0'; } - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive to %s: destination already exists"), - zc.zc_filename); - break; - case ENOENT: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: destination does not exist")); - break; - case EBUSY: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: destination is in use")); - break; - case ENOSPC: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: out of space")); - break; - case EDQUOT: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: quota exceeded")); - break; - case EINTR: - zfs_error(dgettext(TEXT_DOMAIN, - "receive failed: signal received")); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination already exists")); + (void) zfs_error(hdl, EZFS_EXISTS, dgettext(TEXT_DOMAIN, + "cannot restore to %s"), zc.zc_filename); break; case EINVAL: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: invalid stream")); + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: invalid stream " - "(checksum mismatch)")); - break; - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot receive: permission denied")); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid stream (checksum mismatch)")); + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; default: - zfs_baderror(errno); + (void) zfs_standard_error(hdl, errno, errbuf); } } @@ -2705,16 +2550,17 @@ ancestorerr: zfs_handle_t *h; *cp = '\0'; - h = zfs_open(zc.zc_filename, + h = zfs_open(hdl, zc.zc_filename, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); *cp = '@'; if (h) { if (h->zfs_type == ZFS_TYPE_FILESYSTEM) { err = zfs_mount(h, NULL, 0); } else { - err = zvol_create_link(h->zfs_name); + err = zvol_create_link(hdl, h->zfs_name); if (err == 0 && ioctl_err == 0) - err = zvol_create_link(zc.zc_filename); + err = zvol_create_link(hdl, + zc.zc_filename); } zfs_close(h); } @@ -2750,7 +2596,7 @@ typedef struct rollback_data { uint64_t cb_create; /* creation time reference */ prop_changelist_t *cb_clp; /* changelist pointer */ int cb_error; - int cb_dependent; + boolean_t cb_dependent; } rollback_data_t; static int @@ -2764,9 +2610,9 @@ rollback_destroy(zfs_handle_t *zhp, void *data) zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { - cbp->cb_dependent = TRUE; + cbp->cb_dependent = B_TRUE; (void) zfs_iter_dependents(zhp, rollback_destroy, cbp); - cbp->cb_dependent = FALSE; + cbp->cb_dependent = B_FALSE; if (zfs_destroy(zhp) != 0) cbp->cb_error = 1; @@ -2797,7 +2643,7 @@ do_rollback(zfs_handle_t *zhp) zhp->zfs_type == ZFS_TYPE_VOLUME); if (zhp->zfs_type == ZFS_TYPE_VOLUME && - zvol_remove_link(zhp->zfs_name) != 0) + zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) return (-1); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -2814,58 +2660,13 @@ do_rollback(zfs_handle_t *zhp) * condition where the user has taken a snapshot since we verified that * this was the most recent. */ - if ((ret = zfs_ioctl(ZFS_IOC_ROLLBACK, &zc)) != 0) { - switch (errno) { - case EPERM: - /* - * The user doesn't have permission to rollback the - * given dataset. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': " - "permission denied"), zhp->zfs_name); - break; - - case EDQUOT: - case ENOSPC: - /* - * The parent dataset doesn't have enough space to - * rollback to the last snapshot. - */ - { - char parent[ZFS_MAXNAMELEN]; - (void) parent_name(zhp->zfs_name, parent, - sizeof (parent)); - zfs_error(dgettext(TEXT_DOMAIN, "cannot " - "rollback '%s': out of space"), parent); - } - break; - - case ENOENT: - /* - * The dataset doesn't exist. This shouldn't happen - * except in race conditions. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': " - "no such %s"), zhp->zfs_name, - zfs_type_to_name(zhp->zfs_type)); - break; - - case EBUSY: - /* - * The filesystem is busy. This should have been caught - * by the caller before getting here, but there may be - * an unexpected problem. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': " - "%s is busy"), zhp->zfs_name, - zfs_type_to_name(zhp->zfs_type)); - break; - - default: - zfs_baderror(errno); - } + if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_ROLLBACK, + &zc)) != 0) { + (void) zfs_standard_error(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot rollback '%s'"), + zhp->zfs_name); } else if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - ret = zvol_create_link(zhp->zfs_name); + ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); } return (ret); @@ -2946,9 +2747,10 @@ zfs_iter_dependents(zfs_handle_t *zhp, zfs_iter_f func, void *data) zfs_handle_t *child; int ret = 0; - dependents = get_dependents(zhp->zfs_name, &count); + dependents = get_dependents(zhp->zfs_hdl, zhp->zfs_name, &count); for (i = 0; i < count; i++) { - if ((child = make_dataset_handle(dependents[i])) == NULL) + if ((child = make_dataset_handle(zhp->zfs_hdl, + dependents[i])) == NULL) continue; if ((ret = func(child, data)) != 0) @@ -2970,10 +2772,11 @@ zfs_rename(zfs_handle_t *zhp, const char *target) { int ret; zfs_cmd_t zc = { 0 }; - char reason[64]; char *delim; prop_changelist_t *cl; char parent[ZFS_MAXNAMELEN]; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_prop_value, target, sizeof (zc.zc_prop_value)); @@ -2982,22 +2785,21 @@ zfs_rename(zfs_handle_t *zhp, const char *target) if (strcmp(zhp->zfs_name, target) == 0) return (0); + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot rename to '%s'"), target); + /* * Make sure the target name is valid */ - if (!zfs_validate_name(target, zhp->zfs_type, reason, - sizeof (reason))) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create '%s': %s in %s name"), target, reason, - zfs_type_to_name(zhp->zfs_type)); - return (-1); - } + if (!zfs_validate_name(hdl, target, zhp->zfs_type)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { + if ((delim = strchr(target, '@')) == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot rename to '%s': not a snapshot"), target); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not a snapshot")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } /* @@ -3005,17 +2807,16 @@ zfs_rename(zfs_handle_t *zhp, const char *target) */ if (strncmp(zhp->zfs_name, target, delim - target) != 0 || zhp->zfs_name[delim - target] != '@') { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot rename to '%s': snapshots must be part " - "of same dataset"), target); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshots must be part of same dataset")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); } (void) strncpy(parent, target, delim - target); parent[delim - target] = '\0'; } else { /* validate parents */ - if (check_parents(target, zhp->zfs_type) != 0) + if (check_parents(hdl, target) != 0) return (-1); (void) parent_name(target, parent, sizeof (parent)); @@ -3024,28 +2825,30 @@ zfs_rename(zfs_handle_t *zhp, const char *target) verify((delim = strchr(target, '/')) != NULL); if (strncmp(zhp->zfs_name, target, delim - target) != 0 || zhp->zfs_name[delim - target] != '/') { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot rename to '%s': " - "datasets must be within same pool"), target); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "datasets must be within same pool")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); } } + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name); + if (getzoneid() == GLOBAL_ZONEID && zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot rename %s, " - "dataset is used in a non-global zone"), zhp->zfs_name); - return (-1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset is used in a non-global zone")); + return (zfs_error(hdl, EZFS_ZONED, errbuf)); } if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0)) == NULL) - return (1); + return (-1); if (changelist_haszonedchild(cl)) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot rename '%s': child dataset with inherited " - "mountpoint is used in a non-global zone"), zhp->zfs_name); - ret = -1; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "child dataset with inherited mountpoint is used " + "in a non-global zone")); + ret = zfs_error(hdl, EZFS_ZONED, errbuf); goto error; } @@ -3057,59 +2860,8 @@ zfs_rename(zfs_handle_t *zhp, const char *target) else zc.zc_objset_type = DMU_OST_ZFS; - if ((ret = zfs_ioctl(ZFS_IOC_RENAME, &zc)) != 0) { - switch (errno) { - case EPERM: - /* - * The user doesn't have permission to rename the - * given dataset. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s': " - "permission denied"), zhp->zfs_name); - break; - - case EDQUOT: - case ENOSPC: - /* - * Not enough space in the parent dataset. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot " - "rename '%s': not enough space in '%s'"), - zhp->zfs_name, parent); - break; - - case ENOENT: - /* - * The destination doesn't exist. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s' " - "to '%s': destination doesn't exist"), - zhp->zfs_name, target); - break; - - case EEXIST: - /* - * The destination already exists. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s' " - "to '%s': destination already exists"), - zhp->zfs_name, target); - break; - - case EBUSY: - /* - * The filesystem is busy. This should have been caught - * by the caller before getting here, but there may be - * an unexpected problem. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s': " - "%s is busy"), zhp->zfs_name, - zfs_type_to_name(zhp->zfs_type)); - break; - - default: - zfs_baderror(errno); - } + if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc)) != 0) { + (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); /* * On failure, we still want to remount any filesystems that @@ -3132,24 +2884,18 @@ error: * poke devfsadm to create the /dev link, and then wait for the link to appear. */ int -zvol_create_link(const char *dataset) +zvol_create_link(libzfs_handle_t *hdl, const char *dataset) { zfs_cmd_t zc = { 0 }; - di_devlink_handle_t hdl; + di_devlink_handle_t dhdl; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); /* * Issue the appropriate ioctl. */ - if (zfs_ioctl(ZFS_IOC_CREATE_MINOR, &zc) != 0) { + if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) { switch (errno) { - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, "cannot create " - "device links for '%s': permission denied"), - dataset); - break; - case EEXIST: /* * Silently ignore the case where the link already @@ -3159,22 +2905,24 @@ zvol_create_link(const char *dataset) return (0); default: - zfs_baderror(errno); + return (zfs_standard_error(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot create device links " + "for '%s'"), dataset)); } - - return (-1); } /* * Call devfsadm and wait for the links to magically appear. */ - if ((hdl = di_devlink_init(ZFS_DRIVER, DI_MAKE_LINK)) == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot create device links for '%s'"), dataset); - (void) zfs_ioctl(ZFS_IOC_REMOVE_MINOR, &zc); + if ((dhdl = di_devlink_init(ZFS_DRIVER, DI_MAKE_LINK)) == NULL) { + zfs_error_aux(hdl, strerror(errno)); + (void) zfs_error(hdl, EZFS_DEVLINKS, + dgettext(TEXT_DOMAIN, "cannot create device links " + "for '%s'"), dataset); + (void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc); return (-1); } else { - (void) di_devlink_fini(&hdl); + (void) di_devlink_fini(&dhdl); } return (0); @@ -3184,26 +2932,14 @@ zvol_create_link(const char *dataset) * Remove a minor node for the given zvol and the associated /dev links. */ int -zvol_remove_link(const char *dataset) +zvol_remove_link(libzfs_handle_t *hdl, const char *dataset) { zfs_cmd_t zc = { 0 }; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - if (zfs_ioctl(ZFS_IOC_REMOVE_MINOR, &zc) != 0) { + if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) { switch (errno) { - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, "cannot remove " - "device links for '%s': permission denied"), - dataset); - break; - - case EBUSY: - zfs_error(dgettext(TEXT_DOMAIN, "cannot remove " - "device links for '%s': volume is in use"), - dataset); - break; - case ENXIO: /* * Silently ignore the case where the link no longer @@ -3213,10 +2949,10 @@ zvol_remove_link(const char *dataset) return (0); default: - zfs_baderror(errno); + return (zfs_standard_error(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot remove device " + "links for '%s'"), dataset)); } - - return (-1); } return (0); diff --git a/usr/src/lib/libzfs/common/libzfs_graph.c b/usr/src/lib/libzfs/common/libzfs_graph.c index 4c7bb547ee..e86a6c9377 100644 --- a/usr/src/lib/libzfs/common/libzfs_graph.c +++ b/usr/src/lib/libzfs/common/libzfs_graph.c @@ -121,9 +121,12 @@ typedef struct zfs_graph { * Allocate a new edge pointing to the target vertex. */ static zfs_edge_t * -zfs_edge_create(zfs_vertex_t *dest) +zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest) { - zfs_edge_t *zep = zfs_malloc(sizeof (zfs_edge_t)); + zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t)); + + if (zep == NULL) + return (NULL); zep->ze_dest = dest; @@ -143,15 +146,23 @@ zfs_edge_destroy(zfs_edge_t *zep) * Allocate a new vertex with the given name. */ static zfs_vertex_t * -zfs_vertex_create(const char *dataset) +zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset) { - zfs_vertex_t *zvp = zfs_malloc(sizeof (zfs_vertex_t)); + zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t)); + + if (zvp == NULL) + return (NULL); assert(strlen(dataset) < ZFS_MAXNAMELEN); (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset)); - zvp->zv_edges = zfs_malloc(MIN_EDGECOUNT * sizeof (void *)); + if ((zvp->zv_edges = zfs_alloc(hdl, + MIN_EDGECOUNT * sizeof (void *))) == NULL) { + free(zvp); + return (NULL); + } + zvp->zv_edgealloc = MIN_EDGECOUNT; return (zvp); @@ -175,15 +186,22 @@ zfs_vertex_destroy(zfs_vertex_t *zvp) /* * Given a vertex, add an edge to the destination vertex. */ -static void -zfs_vertex_add_edge(zfs_vertex_t *zvp, zfs_vertex_t *dest) +static int +zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp, + zfs_vertex_t *dest) { - zfs_edge_t *zep = zfs_edge_create(dest); + zfs_edge_t *zep = zfs_edge_create(hdl, dest); + + if (zep == NULL) + return (-1); if (zvp->zv_edgecount == zvp->zv_edgealloc) { - zfs_edge_t **newedges = zfs_malloc(zvp->zv_edgealloc * 2 * + zfs_edge_t **newedges = zfs_alloc(hdl, zvp->zv_edgealloc * 2 * sizeof (void *)); + if (newedges == NULL) + return (-1); + bcopy(zvp->zv_edges, newedges, zvp->zv_edgealloc * sizeof (void *)); @@ -193,6 +211,8 @@ zfs_vertex_add_edge(zfs_vertex_t *zvp, zfs_vertex_t *dest) } zvp->zv_edges[zvp->zv_edgecount++] = zep; + + return (0); } static int @@ -227,12 +247,19 @@ zfs_vertex_sort_edges(zfs_vertex_t *zvp) * datasets in the pool. */ static zfs_graph_t * -zfs_graph_create(size_t size) +zfs_graph_create(libzfs_handle_t *hdl, size_t size) { - zfs_graph_t *zgp = zfs_malloc(sizeof (zfs_graph_t)); + zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t)); + + if (zgp == NULL) + return (NULL); zgp->zg_size = size; - zgp->zg_hash = zfs_malloc(size * sizeof (zfs_vertex_t *)); + if ((zgp->zg_hash = zfs_alloc(hdl, + size * sizeof (zfs_vertex_t *))) == NULL) { + free(zgp); + return (NULL); + } return (zgp); } @@ -280,7 +307,8 @@ zfs_graph_hash(zfs_graph_t *zgp, const char *str) * Given a dataset name, finds the associated vertex, creating it if necessary. */ static zfs_vertex_t * -zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg) +zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset, + uint64_t txg) { size_t idx = zfs_graph_hash(zgp, dataset); zfs_vertex_t *zvp; @@ -293,7 +321,9 @@ zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg) } } - zvp = zfs_vertex_create(dataset); + if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL) + return (NULL); + zvp->zv_next = zgp->zg_hash[idx]; zvp->zv_txg = txg; zgp->zg_hash[idx] = zvp; @@ -308,43 +338,52 @@ zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg) * created it as a destination of another edge. If 'dest' is NULL, then this * is an individual vertex (i.e. the starting vertex), so don't add an edge. */ -static void -zfs_graph_add(zfs_graph_t *zgp, const char *source, const char *dest, - uint64_t txg) +static int +zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source, + const char *dest, uint64_t txg) { zfs_vertex_t *svp, *dvp; - svp = zfs_graph_lookup(zgp, source, 0); + if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL) + return (-1); svp->zv_visited = 1; if (dest != NULL) { - dvp = zfs_graph_lookup(zgp, dest, txg); - zfs_vertex_add_edge(svp, dvp); + dvp = zfs_graph_lookup(hdl, zgp, dest, txg); + if (dvp == NULL) + return (-1); + if (zfs_vertex_add_edge(hdl, svp, dvp) != 0) + return (-1); } + + return (0); } /* * Iterate over all children of the given dataset, adding any vertices as - * necessary. Returns 0 if no cloned snapshots were seen, 1 otherwise. This is + * necessary. Returns 0 if no cloned snapshots were seen, -1 if there was an + * error, or 1 otherwise. This is * a simple recursive algorithm - the ZFS namespace typically is very flat. We * manually invoke the necessary ioctl() calls to avoid the overhead and * additional semantics of zfs_open(). */ static int -iterate_children(zfs_graph_t *zgp, const char *dataset) +iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) { zfs_cmd_t zc = { 0 }; - int ret = 0; + int ret = 0, err; zfs_vertex_t *zvp; /* * Look up the source vertex, and avoid it if we've seen it before. */ - zvp = zfs_graph_lookup(zgp, dataset, 0); + zvp = zfs_graph_lookup(hdl, zgp, dataset, 0); + if (zvp == NULL) + return (-1); if (zvp->zv_visited) return (0); for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - zfs_ioctl(ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; + ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { /* @@ -358,32 +397,38 @@ iterate_children(zfs_graph_t *zgp, const char *dataset) * dataset and clone statistics. If this fails, the dataset has * since been removed, and we're pretty much screwed anyway. */ - if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0) + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) continue; /* * Add an edge between the parent and the child. */ - zfs_graph_add(zgp, dataset, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg); + if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name, + zc.zc_objset_stats.dds_creation_txg) != 0) + return (-1); /* * If this dataset has a clone parent, add an appropriate edge. */ - if (zc.zc_objset_stats.dds_clone_of[0] != '\0') - zfs_graph_add(zgp, zc.zc_objset_stats.dds_clone_of, - zc.zc_name, zc.zc_objset_stats.dds_creation_txg); + if (zc.zc_objset_stats.dds_clone_of[0] != '\0' && + zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of, + zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0) + return (-1); /* * Iterate over all children */ - ret |= iterate_children(zgp, zc.zc_name); + err = iterate_children(hdl, zgp, zc.zc_name); + if (err == -1) + return (-1); + else if (err == 1) + ret = 1; /* * Indicate if we found a dataset with a non-zero clone count. */ if (zc.zc_objset_stats.dds_num_clones != 0) - ret |= 1; + ret = 1; } /* @@ -392,7 +437,7 @@ iterate_children(zfs_graph_t *zgp, const char *dataset) bzero(&zc, sizeof (zc)); for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - zfs_ioctl(ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0; + ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { /* @@ -400,20 +445,21 @@ iterate_children(zfs_graph_t *zgp, const char *dataset) * dataset and clone statistics. If this fails, the dataset has * since been removed, and we're pretty much screwed anyway. */ - if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0) + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) continue; /* * Add an edge between the parent and the child. */ - zfs_graph_add(zgp, dataset, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg); + if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name, + zc.zc_objset_stats.dds_creation_txg) != 0) + return (-1); /* * Indicate if we found a dataset with a non-zero clone count. */ if (zc.zc_objset_stats.dds_num_clones != 0) - ret |= 1; + ret = 1; } zvp->zv_visited = 1; @@ -428,20 +474,24 @@ iterate_children(zfs_graph_t *zgp, const char *dataset) * over all datasets. */ static zfs_graph_t * -construct_graph(const char *dataset) +construct_graph(libzfs_handle_t *hdl, const char *dataset) { - zfs_graph_t *zgp = zfs_graph_create(ZFS_GRAPH_SIZE); + zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE); zfs_cmd_t zc = { 0 }; + int ret = 0; + + if (zgp == NULL) + return (zgp); /* * We need to explicitly check whether this dataset has clones or not, * since iterate_children() only checks the children. */ (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - (void) zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc); + (void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc); if (zc.zc_objset_stats.dds_num_clones != 0 || - iterate_children(zgp, dataset) != 0) { + (ret = iterate_children(hdl, zgp, dataset)) != 0) { /* * Determine pool name and try again. */ @@ -449,17 +499,29 @@ construct_graph(const char *dataset) if ((slash = strchr(dataset, '/')) != NULL || (slash = strchr(dataset, '@')) != NULL) { - pool = zfs_malloc(slash - dataset + 1); + pool = zfs_alloc(hdl, slash - dataset + 1); + if (pool == NULL) { + zfs_graph_destroy(zgp); + return (NULL); + } (void) strncpy(pool, dataset, slash - dataset); pool[slash - dataset] = '\0'; - (void) iterate_children(zgp, pool); - zfs_graph_add(zgp, pool, NULL, 0); + if (iterate_children(hdl, zgp, pool) == -1 || + zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) { + free(pool); + zfs_graph_destroy(zgp); + return (NULL); + } free(pool); } } - zfs_graph_add(zgp, dataset, NULL, 0); + + if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) { + zfs_graph_destroy(zgp); + return (NULL); + } return (zgp); } @@ -469,27 +531,33 @@ construct_graph(const char *dataset) * really just a depth first search, so that the deepest nodes appear first. * hijack the 'zv_visited' marker to avoid visiting the same vertex twice. */ -static void -topo_sort(char **result, size_t *idx, zfs_vertex_t *zgv) +static int +topo_sort(libzfs_handle_t *hdl, char **result, size_t *idx, zfs_vertex_t *zgv) { int i; /* avoid doing a search if we don't have to */ if (zgv->zv_visited == 2) - return; + return (0); zfs_vertex_sort_edges(zgv); - for (i = 0; i < zgv->zv_edgecount; i++) - topo_sort(result, idx, zgv->zv_edges[i]->ze_dest); + for (i = 0; i < zgv->zv_edgecount; i++) { + if (topo_sort(hdl, result, idx, zgv->zv_edges[i]->ze_dest) != 0) + return (-1); + } /* we may have visited this in the course of the above */ if (zgv->zv_visited == 2) - return; + return (0); + + if ((result[*idx] = zfs_alloc(hdl, + strlen(zgv->zv_dataset) + 1)) == NULL) + return (-1); - result[*idx] = zfs_malloc(strlen(zgv->zv_dataset) + 1); (void) strcpy(result[*idx], zgv->zv_dataset); *idx += 1; zgv->zv_visited = 2; + return (0); } /* @@ -498,19 +566,33 @@ topo_sort(char **result, size_t *idx, zfs_vertex_t *zgv) * sort, and then return the array of strings to the caller. */ char ** -get_dependents(const char *dataset, size_t *count) +get_dependents(libzfs_handle_t *hdl, const char *dataset, size_t *count) { char **result; zfs_graph_t *zgp; zfs_vertex_t *zvp; - zgp = construct_graph(dataset); - result = zfs_malloc(zgp->zg_nvertex * sizeof (char *)); + if ((zgp = construct_graph(hdl, dataset)) == NULL) + return (NULL); - zvp = zfs_graph_lookup(zgp, dataset, 0); + if ((result = zfs_alloc(hdl, + zgp->zg_nvertex * sizeof (char *))) == NULL) { + zfs_graph_destroy(zgp); + return (NULL); + } + + if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) { + free(result); + zfs_graph_destroy(zgp); + return (NULL); + } *count = 0; - topo_sort(result, count, zvp); + if (topo_sort(hdl, result, count, zvp) != 0) { + free(result); + zfs_graph_destroy(zgp); + return (NULL); + } /* * Get rid of the last entry, which is our starting vertex and not diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h index 76bca21242..2c5e890767 100644 --- a/usr/src/lib/libzfs/common/libzfs_impl.h +++ b/usr/src/lib/libzfs/common/libzfs_impl.h @@ -34,13 +34,29 @@ #include <sys/zfs_acl.h> #include <sys/nvpair.h> +#include <libuutil.h> #include <libzfs.h> #ifdef __cplusplus extern "C" { #endif +struct libzfs_handle { + int libzfs_error; + int libzfs_fd; + FILE *libzfs_mnttab; + FILE *libzfs_sharetab; + uu_avl_pool_t *libzfs_ns_avlpool; + uu_avl_t *libzfs_ns_avl; + uint64_t libzfs_ns_gen; + int libzfs_desc_active; + char libzfs_action[1024]; + char libzfs_desc[1024]; + int libzfs_printerr; +}; + struct zfs_handle { + libzfs_handle_t *zfs_hdl; char zfs_name[ZFS_MAXNAMELEN]; zfs_type_t zfs_type; dmu_objset_stats_t zfs_dmustats; @@ -52,6 +68,7 @@ struct zfs_handle { }; struct zpool_handle { + libzfs_handle_t *zpool_hdl; char zpool_name[ZPOOL_MAXNAMELEN]; int zpool_state; size_t zpool_config_size; @@ -61,18 +78,16 @@ struct zpool_handle { size_t zpool_error_count; }; -void zfs_error(const char *, ...); -void zfs_fatal(const char *, ...); -void *zfs_malloc(size_t); -char *zfs_strdup(const char *); -void no_memory(void); +int zfs_error(libzfs_handle_t *, int, const char *, ...); +void zfs_error_aux(libzfs_handle_t *, const char *, ...); +void *zfs_alloc(libzfs_handle_t *, size_t); +char *zfs_strdup(libzfs_handle_t *, const char *); +int no_memory(libzfs_handle_t *); -#define zfs_baderror(err) \ - (zfs_fatal(dgettext(TEXT_DOMAIN, \ - "internal error: unexpected error %d at line %d of %s"), \ - (err), (__LINE__), (__FILE__))) +int zfs_standard_error(libzfs_handle_t *, int, const char *, ...); +int zpool_standard_error(libzfs_handle_t *, int, const char *, ...); -char **get_dependents(const char *, size_t *); +char **get_dependents(libzfs_handle_t *, const char *, size_t *); typedef struct prop_changelist prop_changelist_t; @@ -87,17 +102,15 @@ int changelist_haszonedchild(prop_changelist_t *); void remove_mountpoint(zfs_handle_t *); -zfs_handle_t *make_dataset_handle(const char *); -void set_pool_health(nvlist_t *); +zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *); +int set_pool_health(nvlist_t *); -zpool_handle_t *zpool_open_silent(const char *); +zpool_handle_t *zpool_open_silent(libzfs_handle_t *, const char *); -int zvol_create_link(const char *); -int zvol_remove_link(const char *); +int zvol_create_link(libzfs_handle_t *, const char *); +int zvol_remove_link(libzfs_handle_t *, const char *); -int zfs_ioctl(int, zfs_cmd_t *); -FILE *zfs_mnttab(void); -FILE *zfs_sharetab(void); +void namespace_clear(libzfs_handle_t *); #ifdef __cplusplus } diff --git a/usr/src/lib/libzfs/common/libzfs_import.c b/usr/src/lib/libzfs/common/libzfs_import.c index 98519c3aae..ef34419146 100644 --- a/usr/src/lib/libzfs/common/libzfs_import.c +++ b/usr/src/lib/libzfs/common/libzfs_import.c @@ -78,7 +78,7 @@ typedef struct pool_entry { } pool_entry_t; typedef struct name_entry { - const char *ne_name; + char *ne_name; uint64_t ne_guid; struct name_entry *ne_next; } name_entry_t; @@ -117,7 +117,7 @@ get_devid(const char *path) * Go through and fix up any path and/or devid information for the given vdev * configuration. */ -static void +static int fix_paths(nvlist_t *nv, name_entry_t *names) { nvlist_t **child; @@ -130,8 +130,9 @@ fix_paths(nvlist_t *nv, name_entry_t *names) if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) - fix_paths(child[c], names); - return; + if (fix_paths(child[c], names) != 0) + return (-1); + return (0); } /* @@ -182,31 +183,56 @@ fix_paths(nvlist_t *nv, name_entry_t *names) } if (best == NULL) - return; + return (0); - verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) == 0); + if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) + return (-1); if ((devid = get_devid(best->ne_name)) == NULL) { (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); } else { - verify(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) == 0); + if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) + return (-1); devid_str_free(devid); } + + return (0); } /* * Add the given configuration to the list of known devices. */ -static void -add_config(pool_list_t *pl, const char *path, nvlist_t *config) +static int +add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, + nvlist_t *config) { - uint64_t pool_guid, vdev_guid, top_guid, txg; + uint64_t pool_guid, vdev_guid, top_guid, txg, state; pool_entry_t *pe; vdev_entry_t *ve; config_entry_t *ce; name_entry_t *ne; /* + * If this is a hot spare not currently in use, add it to the list of + * names to translate, but don't do anything else. + */ + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, + &state) == 0 && state == POOL_STATE_SPARE && + nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { + if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) + return (-1); + + if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { + free(ne); + return (-1); + } + ne->ne_guid = vdev_guid; + ne->ne_next = pl->names; + pl->names = ne; + return (0); + } + + /* * If we have a valid config but cannot read any of these fields, then * it means we have a half-initialized label. In vdev_label_init() * we write a label with txg == 0 so that we can identify the device @@ -223,7 +249,7 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) != 0 || txg == 0) { nvlist_free(config); - return; + return (0); } /* @@ -236,7 +262,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config) } if (pe == NULL) { - pe = zfs_malloc(sizeof (pool_entry_t)); + if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) { + nvlist_free(config); + return (-1); + } pe->pe_guid = pool_guid; pe->pe_next = pl->pools; pl->pools = pe; @@ -252,7 +281,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config) } if (ve == NULL) { - ve = zfs_malloc(sizeof (vdev_entry_t)); + if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { + nvlist_free(config); + return (-1); + } ve->ve_guid = top_guid; ve->ve_next = pe->pe_vdevs; pe->pe_vdevs = ve; @@ -269,7 +301,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config) } if (ce == NULL) { - ce = zfs_malloc(sizeof (config_entry_t)); + if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) { + nvlist_free(config); + return (-1); + } ce->ce_txg = txg; ce->ce_config = config; ce->ce_next = ve->ve_configs; @@ -284,24 +319,31 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config) * mappings so that we can fix up the configuration as necessary before * doing the import. */ - ne = zfs_malloc(sizeof (name_entry_t)); + if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) + return (-1); + + if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { + free(ne); + return (-1); + } - ne->ne_name = zfs_strdup(path); ne->ne_guid = vdev_guid; ne->ne_next = pl->names; pl->names = ne; + + return (0); } /* * Returns true if the named pool matches the given GUID. */ -boolean_t -pool_active(const char *name, uint64_t guid) +static boolean_t +pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid) { zpool_handle_t *zhp; uint64_t theguid; - if ((zhp = zpool_open_silent(name)) == NULL) + if ((zhp = zpool_open_silent(hdl, name)) == NULL) return (B_FALSE); verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID, @@ -320,41 +362,42 @@ pool_active(const char *name, uint64_t guid) * return to the user. */ static nvlist_t * -get_configs(pool_list_t *pl) +get_configs(libzfs_handle_t *hdl, pool_list_t *pl) { - pool_entry_t *pe, *penext; - vdev_entry_t *ve, *venext; - config_entry_t *ce, *cenext; - nvlist_t *ret, *config, *tmp, *nvtop, *nvroot; - int config_seen; + pool_entry_t *pe; + vdev_entry_t *ve; + config_entry_t *ce; + nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot; + nvlist_t **spares; + uint_t i, nspares; + boolean_t config_seen; uint64_t best_txg; char *name; zfs_cmd_t zc = { 0 }; - uint64_t guid; + uint64_t version, guid; char *packed; size_t len; int err; + uint_t children = 0; + nvlist_t **child = NULL; + uint_t c; - verify(nvlist_alloc(&ret, 0, 0) == 0); + if (nvlist_alloc(&ret, 0, 0) != 0) + goto nomem; - for (pe = pl->pools; pe != NULL; pe = penext) { - uint_t c; - uint_t children = 0; + for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { uint64_t id; - nvlist_t **child = NULL; - penext = pe->pe_next; - - verify(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); - config_seen = FALSE; + if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) + goto nomem; + config_seen = B_FALSE; /* * Iterate over all toplevel vdevs. Grab the pool configuration * from the first one we find, and then go through the rest and * add them as necessary to the 'vdevs' member of the config. */ - for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { - venext = ve->ve_next; + for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { /* * Determine the best configuration for this vdev by @@ -365,8 +408,10 @@ get_configs(pool_list_t *pl) for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { - if (ce->ce_txg > best_txg) + if (ce->ce_txg > best_txg) { tmp = ce->ce_config; + best_txg = ce->ce_txg; + } } if (!config_seen) { @@ -374,6 +419,7 @@ get_configs(pool_list_t *pl) * Copy the relevant pieces of data to the pool * configuration: * + * version * pool guid * name * pool state @@ -381,19 +427,27 @@ get_configs(pool_list_t *pl) uint64_t state; verify(nvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_VERSION, &version) == 0); + if (nvlist_add_uint64(config, + ZPOOL_CONFIG_VERSION, version) != 0) + goto nomem; + verify(nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); - verify(nvlist_add_uint64(config, - ZPOOL_CONFIG_POOL_GUID, guid) == 0); + if (nvlist_add_uint64(config, + ZPOOL_CONFIG_POOL_GUID, guid) != 0) + goto nomem; verify(nvlist_lookup_string(tmp, ZPOOL_CONFIG_POOL_NAME, &name) == 0); - verify(nvlist_add_string(config, - ZPOOL_CONFIG_POOL_NAME, name) == 0); + if (nvlist_add_string(config, + ZPOOL_CONFIG_POOL_NAME, name) != 0) + goto nomem; verify(nvlist_lookup_uint64(tmp, ZPOOL_CONFIG_POOL_STATE, &state) == 0); - verify(nvlist_add_uint64(config, - ZPOOL_CONFIG_POOL_STATE, state) == 0); + if (nvlist_add_uint64(config, + ZPOOL_CONFIG_POOL_STATE, state) != 0) + goto nomem; - config_seen = TRUE; + config_seen = B_TRUE; } /* @@ -406,8 +460,10 @@ get_configs(pool_list_t *pl) if (id >= children) { nvlist_t **newchild; - newchild = zfs_malloc((id + 1) * + newchild = zfs_alloc(hdl, (id + 1) * sizeof (nvlist_t *)); + if (newchild == NULL) + goto nomem; for (c = 0; c < children; c++) newchild[c] = child[c]; @@ -416,23 +472,9 @@ get_configs(pool_list_t *pl) child = newchild; children = id + 1; } - verify(nvlist_dup(nvtop, &child[id], 0) == 0); + if (nvlist_dup(nvtop, &child[id], 0) != 0) + goto nomem; - /* - * Go through and free all config information. - */ - for (ce = ve->ve_configs; ce != NULL; ce = cenext) { - cenext = ce->ce_next; - - nvlist_free(ce->ce_config); - free(ce); - } - - /* - * Free this vdev entry, since it has now been merged - * into the main config. - */ - free(ve); } verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, @@ -448,51 +490,63 @@ get_configs(pool_list_t *pl) for (c = 0; c < children; c++) if (child[c] == NULL) { nvlist_t *missing; - verify(nvlist_alloc(&missing, NV_UNIQUE_NAME, - 0) == 0); - verify(nvlist_add_string(missing, - ZPOOL_CONFIG_TYPE, VDEV_TYPE_MISSING) == 0); - verify(nvlist_add_uint64(missing, - ZPOOL_CONFIG_ID, c) == 0); - verify(nvlist_add_uint64(missing, - ZPOOL_CONFIG_GUID, 0ULL) == 0); + if (nvlist_alloc(&missing, NV_UNIQUE_NAME, + 0) != 0) + goto nomem; + if (nvlist_add_string(missing, + ZPOOL_CONFIG_TYPE, + VDEV_TYPE_MISSING) != 0 || + nvlist_add_uint64(missing, + ZPOOL_CONFIG_ID, c) != 0 || + nvlist_add_uint64(missing, + ZPOOL_CONFIG_GUID, 0ULL) != 0) { + nvlist_free(missing); + goto nomem; + } child[c] = missing; } /* * Put all of this pool's top-level vdevs into a root vdev. */ - verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); - verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, - VDEV_TYPE_ROOT) == 0); - verify(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); - verify(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) == 0); - verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - child, children) == 0); + if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) + goto nomem; + if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_ROOT) != 0 || + nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || + nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || + nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + child, children) != 0) { + nvlist_free(nvroot); + goto nomem; + } for (c = 0; c < children; c++) nvlist_free(child[c]); free(child); + children = 0; + child = NULL; /* * Go through and fix up any paths and/or devids based on our * known list of vdev GUID -> path mappings. */ - fix_paths(nvroot, pl->names); + if (fix_paths(nvroot, pl->names) != 0) { + nvlist_free(nvroot); + goto nomem; + } /* * Add the root vdev to this pool's configuration. */ - verify(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - nvroot) == 0); + if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + nvroot) != 0) { + nvlist_free(nvroot); + goto nomem; + } nvlist_free(nvroot); /* - * Free this pool entry. - */ - free(pe); - - /* * Determine if this pool is currently active, in which case we * can't actually import it. */ @@ -501,8 +555,9 @@ get_configs(pool_list_t *pl) verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); - if (pool_active(name, guid)) { + if (pool_active(hdl, name, guid)) { nvlist_free(config); + config = NULL; continue; } @@ -510,13 +565,14 @@ get_configs(pool_list_t *pl) * Try to do the import in order to get vdev state. */ if ((err = nvlist_size(config, &len, NV_ENCODE_NATIVE)) != 0) - zfs_baderror(err); + goto nomem; - packed = zfs_malloc(len); + if ((packed = zfs_alloc(hdl, len)) == NULL) + goto nomem; if ((err = nvlist_pack(config, &packed, &len, NV_ENCODE_NATIVE, 0)) != 0) - zfs_baderror(err); + goto nomem; nvlist_free(config); config = NULL; @@ -525,37 +581,76 @@ get_configs(pool_list_t *pl) zc.zc_config_src = (uint64_t)(uintptr_t)packed; zc.zc_config_dst_size = 2 * len; - zc.zc_config_dst = (uint64_t)(uintptr_t) - zfs_malloc(zc.zc_config_dst_size); + if ((zc.zc_config_dst = (uint64_t)(uintptr_t) + zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL) + goto nomem; - while ((err = zfs_ioctl(ZFS_IOC_POOL_TRYIMPORT, + while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT, &zc)) != 0 && errno == ENOMEM) { free((void *)(uintptr_t)zc.zc_config_dst); - zc.zc_config_dst = (uint64_t)(uintptr_t) - zfs_malloc(zc.zc_config_dst_size); + if ((zc.zc_config_dst = (uint64_t)(uintptr_t) + zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL) + goto nomem; } free(packed); - if (err) - zfs_baderror(errno); + if (err) { + (void) zpool_standard_error(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot discover pools")); + free((void *)(uintptr_t)zc.zc_config_dst); + goto error; + } - verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst, - zc.zc_config_dst_size, &config, 0) == 0); + if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst, + zc.zc_config_dst_size, &config, 0) != 0) { + free((void *)(uintptr_t)zc.zc_config_dst); + goto nomem; + } + free((void *)(uintptr_t)zc.zc_config_dst); - set_pool_health(config); + /* + * Go through and update the paths for spares, now that we have + * them. + */ + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + for (i = 0; i < nspares; i++) { + if (fix_paths(spares[i], pl->names) != 0) + goto nomem; + } + } + + if (set_pool_health(config) != 0) + goto nomem; /* * Add this pool to the list of configs. */ - verify(nvlist_add_nvlist(ret, name, config) == 0); + if (nvlist_add_nvlist(ret, name, config) != 0) + goto nomem; nvlist_free(config); - - free((void *)(uintptr_t)zc.zc_config_dst); + config = NULL; } return (ret); + +nomem: + (void) no_memory(hdl); +error: + if (config) + nvlist_free(config); + if (ret) + nvlist_free(ret); + for (c = 0; c < children; c++) + nvlist_free(child[c]); + if (child) + free(child); + + return (NULL); } /* @@ -572,19 +667,21 @@ label_offset(size_t size, int l) * Given a file descriptor, read the label information and return an nvlist * describing the configuration, if there is one. */ -nvlist_t * -zpool_read_label(int fd) +int +zpool_read_label(int fd, nvlist_t **config) { struct stat64 statbuf; int l; vdev_label_t *label; - nvlist_t *config; uint64_t state, txg; + *config = NULL; + if (fstat64(fd, &statbuf) == -1) - return (NULL); + return (0); - label = zfs_malloc(sizeof (vdev_label_t)); + if ((label = malloc(sizeof (vdev_label_t))) == NULL) + return (-1); for (l = 0; l < VDEV_LABELS; l++) { if (pread(fd, label, sizeof (vdev_label_t), @@ -592,27 +689,29 @@ zpool_read_label(int fd) continue; if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, - sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) + sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) continue; - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, - &state) != 0 || state > POOL_STATE_DESTROYED) { - nvlist_free(config); + if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, + &state) != 0 || state > POOL_STATE_SPARE) { + nvlist_free(*config); continue; } - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, - &txg) != 0 || txg == 0) { - nvlist_free(config); + if (state != POOL_STATE_SPARE && + (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, + &txg) != 0 || txg == 0)) { + nvlist_free(*config); continue; } free(label); - return (config); + return (0); } free(label); - return (NULL); + *config = NULL; + return (0); } /* @@ -621,17 +720,22 @@ zpool_read_label(int fd) * given (argc is 0), then the default directory (/dev/dsk) is searched. */ nvlist_t * -zpool_find_import(int argc, char **argv) +zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv) { int i; DIR *dirp; struct dirent64 *dp; char path[MAXPATHLEN]; struct stat64 statbuf; - nvlist_t *ret, *config; + nvlist_t *ret = NULL, *config; static char *default_dir = "/dev/dsk"; int fd; pool_list_t pools = { 0 }; + pool_entry_t *pe, *penext; + vdev_entry_t *ve, *venext; + config_entry_t *ce, *cenext; + name_entry_t *ne, *nenext; + if (argc == 0) { argc = 1; @@ -645,17 +749,18 @@ zpool_find_import(int argc, char **argv) */ for (i = 0; i < argc; i++) { if (argv[i][0] != '/') { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot open '%s': must be an absolute path"), + (void) zfs_error(hdl, EZFS_BADPATH, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), argv[i]); - return (NULL); + goto error; } if ((dirp = opendir(argv[i])) == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot open '%s': %s"), argv[i], - strerror(errno)); - return (NULL); + zfs_error_aux(hdl, strerror(errno)); + (void) zfs_error(hdl, EZFS_BADPATH, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), + argv[i]); + goto error; } /* @@ -678,21 +783,49 @@ zpool_find_import(int argc, char **argv) if ((fd = open64(path, O_RDONLY)) < 0) continue; - config = zpool_read_label(fd); + if ((zpool_read_label(fd, &config)) != 0) { + (void) no_memory(hdl); + goto error; + } (void) close(fd); if (config != NULL) - add_config(&pools, path, config); + if (add_config(hdl, &pools, path, config) != 0) + goto error; } } - ret = get_configs(&pools); + ret = get_configs(hdl, &pools); + +error: + for (pe = pools.pools; pe != NULL; pe = penext) { + penext = pe->pe_next; + for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { + venext = ve->ve_next; + for (ce = ve->ve_configs; ce != NULL; ce = cenext) { + cenext = ce->ce_next; + if (ce->ce_config) + nvlist_free(ce->ce_config); + free(ce); + } + free(ve); + } + free(pe); + } + + for (ne = pools.names; ne != NULL; ne = nenext) { + nenext = ne->ne_next; + if (ne->ne_name) + free(ne->ne_name); + free(ne); + } + return (ret); } -int +boolean_t find_guid(nvlist_t *nv, uint64_t guid) { uint64_t tmp; @@ -701,49 +834,94 @@ find_guid(nvlist_t *nv, uint64_t guid) verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0); if (tmp == guid) - return (TRUE); + return (B_TRUE); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) if (find_guid(child[c], guid)) - return (TRUE); + return (B_TRUE); + } + + return (B_FALSE); +} + +typedef struct spare_cbdata { + uint64_t cb_guid; + zpool_handle_t *cb_zhp; +} spare_cbdata_t; + +static int +find_spare(zpool_handle_t *zhp, void *data) +{ + spare_cbdata_t *cbp = data; + nvlist_t **spares; + uint_t i, nspares; + uint64_t guid; + nvlist_t *nvroot; + + verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + for (i = 0; i < nspares; i++) { + verify(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + if (guid == cbp->cb_guid) { + cbp->cb_zhp = zhp; + return (1); + } + } } - return (FALSE); + zpool_close(zhp); + return (0); } /* - * Determines if the pool is in use. If so, it returns TRUE and the state of + * Determines if the pool is in use. If so, it returns true and the state of * the pool as well as the name of the pool. Both strings are allocated and * must be freed by the caller. */ int -zpool_in_use(int fd, pool_state_t *state, char **namestr) +zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, + boolean_t *inuse) { nvlist_t *config; char *name; - int ret; + boolean_t ret; uint64_t guid, vdev_guid; zpool_handle_t *zhp; nvlist_t *pool_config; uint64_t stateval; + spare_cbdata_t cb = { 0 }; + + *inuse = B_FALSE; - if ((config = zpool_read_label(fd)) == NULL) - return (FALSE); + if (zpool_read_label(fd, &config) != 0) { + (void) no_memory(hdl); + return (-1); + } + + if (config == NULL) + return (0); - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &stateval) == 0); - verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &guid) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0); + if (stateval != POOL_STATE_SPARE) { + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &guid) == 0); + } + switch (stateval) { case POOL_STATE_EXPORTED: - ret = TRUE; + ret = B_TRUE; break; case POOL_STATE_ACTIVE: @@ -754,14 +932,14 @@ zpool_in_use(int fd, pool_state_t *state, char **namestr) * active pool that was disconnected without being explicitly * exported. */ - if (pool_active(name, guid)) { + if (pool_active(hdl, name, guid)) { /* * Because the device may have been removed while * offlined, we only report it as active if the vdev is * still present in the config. Otherwise, pretend like * it's not in use. */ - if ((zhp = zpool_open_canfail(name)) != NULL && + if ((zhp = zpool_open_canfail(hdl, name)) != NULL && (pool_config = zpool_get_config(zhp, NULL)) != NULL) { nvlist_t *nvroot; @@ -770,24 +948,57 @@ zpool_in_use(int fd, pool_state_t *state, char **namestr) ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); ret = find_guid(nvroot, vdev_guid); } else { - ret = FALSE; + ret = B_FALSE; } + + if (zhp != NULL) + zpool_close(zhp); } else { stateval = POOL_STATE_POTENTIALLY_ACTIVE; + ret = B_TRUE; + } + break; + + case POOL_STATE_SPARE: + /* + * For a hot spare, it can be either definitively in use, or + * potentially active. To determine if it's in use, we iterate + * over all pools in the system and search for one with a spare + * with a matching guid. + * + * Due to the shared nature of spares, we don't actually report + * the potentially active case as in use. This means the user + * can freely create pools on the hot spares of exported pools, + * but to do otherwise makes the resulting code complicated, and + * we end up having to deal with this case anyway. + */ + cb.cb_zhp = NULL; + cb.cb_guid = vdev_guid; + if (zpool_iter(hdl, find_spare, &cb) == 1) { + name = (char *)zpool_get_name(cb.cb_zhp); ret = TRUE; + } else { + ret = FALSE; } break; default: - ret = FALSE; + ret = B_FALSE; } if (ret) { - *namestr = zfs_strdup(name); + if ((*namestr = zfs_strdup(hdl, name)) == NULL) { + nvlist_free(config); + return (-1); + } *state = (pool_state_t)stateval; } + if (cb.cb_zhp) + zpool_close(cb.cb_zhp); + nvlist_free(config); - return (ret); + *inuse = ret; + return (0); } diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c index ae4a9937a8..894bcc0d03 100644 --- a/usr/src/lib/libzfs/common/libzfs_mount.c +++ b/usr/src/lib/libzfs/common/libzfs_mount.c @@ -63,44 +63,44 @@ #include "libzfs_impl.h" /* - * Search the sharetab for the given mountpoint, returning TRUE if it is found. + * Search the sharetab for the given mountpoint, returning true if it is found. */ -static int -is_shared(const char *mountpoint) +static boolean_t +is_shared(libzfs_handle_t *hdl, const char *mountpoint) { char buf[MAXPATHLEN], *tab; - if (zfs_sharetab() == NULL) + if (hdl->libzfs_sharetab == NULL) return (0); - (void) fseek(zfs_sharetab(), 0, SEEK_SET); + (void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET); - while (fgets(buf, sizeof (buf), zfs_sharetab()) != NULL) { + while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) { /* the mountpoint is the first entry on each line */ if ((tab = strchr(buf, '\t')) != NULL) { *tab = '\0'; if (strcmp(buf, mountpoint) == 0) - return (1); + return (B_TRUE); } } - return (0); + return (B_FALSE); } /* - * Returns TRUE if the specified directory is empty. If we can't open the - * directory at all, return TRUE so that the mount can fail with a more + * Returns true if the specified directory is empty. If we can't open the + * directory at all, return true so that the mount can fail with a more * informative error message. */ -static int +static boolean_t dir_is_empty(const char *dirname) { DIR *dirp; struct dirent64 *dp; if ((dirp = opendir(dirname)) == NULL) - return (TRUE); + return (B_TRUE); while ((dp = readdir64(dirp)) != NULL) { @@ -109,11 +109,11 @@ dir_is_empty(const char *dirname) continue; (void) closedir(dirp); - return (FALSE); + return (B_FALSE); } (void) closedir(dirp); - return (TRUE); + return (B_TRUE); } /* @@ -121,7 +121,7 @@ dir_is_empty(const char *dirname) * in 'where' with the current mountpoint, and return 1. Otherwise, we return * 0. */ -int +boolean_t zfs_is_mounted(zfs_handle_t *zhp, char **where) { struct mnttab search = { 0 }, entry; @@ -134,14 +134,14 @@ zfs_is_mounted(zfs_handle_t *zhp, char **where) search.mnt_special = (char *)zfs_get_name(zhp); search.mnt_fstype = MNTTYPE_ZFS; - rewind(zfs_mnttab()); - if (getmntany(zfs_mnttab(), &entry, &search) != 0) - return (FALSE); + rewind(zhp->zfs_hdl->libzfs_mnttab); + if (getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) != 0) + return (B_FALSE); if (where != NULL) - *where = zfs_strdup(entry.mnt_mountp); + *where = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp); - return (TRUE); + return (B_TRUE); } /* @@ -153,6 +153,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) struct stat buf; char mountpoint[ZFS_MAXPROPLEN]; char mntopts[MNT_LINE_MAX]; + libzfs_handle_t *hdl = zhp->zfs_hdl; if (options == NULL) mntopts[0] = '\0'; @@ -161,7 +162,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) /* ignore non-filesystems */ if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, - sizeof (mountpoint), NULL, NULL, 0, FALSE) != 0) + sizeof (mountpoint), NULL, NULL, 0, B_FALSE) != 0) return (0); /* return success if there is no mountpoint set */ @@ -173,25 +174,18 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) * If the 'zoned' property is set, and we're in the global zone, simply * return success. */ - if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { - char zonename[ZONENAME_MAX]; - if (getzonenamebyid(getzoneid(), zonename, - sizeof (zonename)) < 0) { - zfs_error(dgettext(TEXT_DOMAIN, "internal error: " - "cannot determine current zone")); - return (1); - } - - if (strcmp(zonename, "global") == 0) - return (0); - } + if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && + getzoneid() == GLOBAL_ZONEID) + return (0); /* Create the directory if it doesn't already exist */ if (lstat(mountpoint, &buf) != 0) { if (mkdirp(mountpoint, 0755) != 0) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': " - "unable to create mountpoint"), mountpoint); - return (1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to create mountpoint")); + return (zfs_error(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), + mountpoint)); } } @@ -204,11 +198,10 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) if ((flags & MS_OVERLAY) == 0 && strstr(mntopts, MNTOPT_REMOUNT) == NULL && !dir_is_empty(mountpoint)) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': " - "directory is not empty"), mountpoint); - zfs_error(dgettext(TEXT_DOMAIN, "use legacy mountpoint to " - "allow this behavior, or use the -O flag")); - return (1); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "directory is not empty")); + return (zfs_error(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint)); } /* perform the mount */ @@ -219,24 +212,15 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) * from mount(), and they're well-understood. We pick a few * common ones to improve upon. */ - switch (errno) { - case EBUSY: - zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': " - "mountpoint or dataset is busy"), zhp->zfs_name); - break; - case EPERM: - case EACCES: - zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': " - "permission denied"), zhp->zfs_name, - mountpoint); - break; - default: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot mount '%s': %s"), - mountpoint, strerror(errno)); - break; - } - return (1); + if (errno == EBUSY) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "mountpoint or dataset is busy")); + else + zfs_error_aux(hdl, strerror(errno)); + + return (zfs_error(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), + zhp->zfs_name)); } return (0); @@ -253,9 +237,9 @@ zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags) /* check to see if need to unmount the filesystem */ search.mnt_special = (char *)zfs_get_name(zhp); search.mnt_fstype = MNTTYPE_ZFS; - rewind(zfs_mnttab()); + rewind(zhp->zfs_hdl->libzfs_mnttab); if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) && - getmntany(zfs_mnttab(), &entry, &search) == 0)) { + getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) { if (mountpoint == NULL) mountpoint = entry.mnt_mountp; @@ -277,10 +261,10 @@ zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags) * semantics from the kernel. */ if (umount2(mountpoint, flags) != 0) { - zfs_error(dgettext(TEXT_DOMAIN, - "cannot unmount '%s': %s"), - mountpoint, strerror(errno)); - return (-1); + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + return (zfs_error(zhp->zfs_hdl, EZFS_UMOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot unmount '%s'"), + mountpoint)); } /* @@ -315,23 +299,23 @@ zfs_unmountall(zfs_handle_t *zhp, int flags) /* * Check to see if the filesystem is currently shared. */ -int +boolean_t zfs_is_shared(zfs_handle_t *zhp, char **where) { char *mountpoint; if (!zfs_is_mounted(zhp, &mountpoint)) - return (FALSE); + return (B_FALSE); - if (is_shared(mountpoint)) { + if (is_shared(zhp->zfs_hdl, mountpoint)) { if (where != NULL) *where = mountpoint; else free(mountpoint); - return (TRUE); + return (B_TRUE); } else { free(mountpoint); - return (FALSE); + return (B_FALSE); } } @@ -346,6 +330,7 @@ zfs_share(zfs_handle_t *zhp) char shareopts[ZFS_MAXPROPLEN]; char buf[MAXPATHLEN]; FILE *fp; + libzfs_handle_t *hdl = zhp->zfs_hdl; /* ignore non-filesystems */ if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) @@ -353,14 +338,14 @@ zfs_share(zfs_handle_t *zhp) /* return success if there is no mountpoint set */ if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, - mountpoint, sizeof (mountpoint), NULL, NULL, 0, FALSE) != 0 || + mountpoint, sizeof (mountpoint), NULL, NULL, 0, B_FALSE) != 0 || strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) == 0 || strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0) return (0); /* return success if there are no share options */ if (zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, sizeof (shareopts), - NULL, NULL, 0, FALSE) != 0 || + NULL, NULL, 0, B_FALSE) != 0 || strcmp(shareopts, "off") == 0) return (0); @@ -386,11 +371,10 @@ zfs_share(zfs_handle_t *zhp) "-F nfs -o \"%s\" \"%s\" 2>&1", shareopts, mountpoint); - if ((fp = popen(buf, "r")) == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot share '%s': " - "share(1M) failed"), zfs_get_name(zhp)); - return (-1); - } + if ((fp = popen(buf, "r")) == NULL) + return (zfs_error(hdl, EZFS_SHAREFAILED, + dgettext(TEXT_DOMAIN, "cannot share '%s'"), + zfs_get_name(zhp))); /* * share(1M) should only produce output if there is some kind @@ -403,14 +387,11 @@ zfs_share(zfs_handle_t *zhp) while (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = '\0'; - if (colon == NULL) - zfs_error(dgettext(TEXT_DOMAIN, "cannot share " - "'%s': share(1M) failed"), - zfs_get_name(zhp)); - else - zfs_error(dgettext(TEXT_DOMAIN, "cannot share " - "'%s': %s"), zfs_get_name(zhp), - colon + 2); + if (colon != NULL) + zfs_error_aux(hdl, colon + 2); + + (void) zfs_error(hdl, EZFS_SHAREFAILED, + dgettext(TEXT_DOMAIN, "cannot share '%s'")); verify(pclose(fp) != 0); return (-1); @@ -429,30 +410,29 @@ zfs_unshare(zfs_handle_t *zhp, const char *mountpoint) { char buf[MAXPATHLEN]; struct mnttab search = { 0 }, entry; + libzfs_handle_t *hdl = zhp->zfs_hdl; /* check to see if need to unmount the filesystem */ search.mnt_special = (char *)zfs_get_name(zhp); search.mnt_fstype = MNTTYPE_ZFS; - rewind(zfs_mnttab()); + rewind(zhp->zfs_hdl->libzfs_mnttab); if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) && - getmntany(zfs_mnttab(), &entry, &search) == 0)) { + getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) { if (mountpoint == NULL) mountpoint = entry.mnt_mountp; - if (is_shared(mountpoint)) { + if (is_shared(zhp->zfs_hdl, mountpoint)) { FILE *fp; (void) snprintf(buf, sizeof (buf), "/usr/sbin/unshare \"%s\" 2>&1", mountpoint); - if ((fp = popen(buf, "r")) == NULL) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot " - "unshare '%s': unshare(1M) failed"), - zfs_get_name(zhp)); - return (-1); - } + if ((fp = popen(buf, "r")) == NULL) + return (zfs_error(hdl, EZFS_UNSHAREFAILED, + dgettext(TEXT_DOMAIN, + "cannot unshare '%s'"), zfs_get_name(zhp))); /* * unshare(1M) should only produce output if there is @@ -465,17 +445,14 @@ zfs_unshare(zfs_handle_t *zhp, const char *mountpoint) while (buf[strlen(buf) - 1] == '\n') buf[strlen(buf) - 1] = '\0'; - if (colon == NULL) - zfs_error(dgettext(TEXT_DOMAIN, - "cannot unshare '%s': unshare(1M) " - "failed"), zfs_get_name(zhp)); - else - zfs_error(dgettext(TEXT_DOMAIN, - "cannot unshare '%s': %s"), - zfs_get_name(zhp), colon + 2); + if (colon != NULL) + zfs_error_aux(hdl, colon + 2); verify(pclose(fp) != 0); - return (-1); + + return (zfs_error(hdl, EZFS_UNSHAREFAILED, + dgettext(TEXT_DOMAIN, + "cannot unshare '%s'"), zfs_get_name(zhp))); } verify(pclose(fp) == 0); @@ -521,24 +498,20 @@ remove_mountpoint(zfs_handle_t *zhp) char mountpoint[ZFS_MAXPROPLEN]; char source[ZFS_MAXNAMELEN]; zfs_source_t sourcetype; - char zonename[ZONENAME_MAX]; + int zoneid = getzoneid(); /* ignore non-filesystems */ if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, sizeof (mountpoint), &sourcetype, source, sizeof (source), - FALSE) != 0) + B_FALSE) != 0) return; - if (getzonenamebyid(getzoneid(), zonename, sizeof (zonename)) < 0) - zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: " - "cannot determine current zone")); - if (strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0 && strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && (sourcetype == ZFS_SRC_DEFAULT || sourcetype == ZFS_SRC_INHERITED) && (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) || - strcmp(zonename, "global") != 0)) { + zoneid != GLOBAL_ZONEID)) { /* * Try to remove the directory, silently ignoring any errors. diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index 1fe6fa2d27..37c82015b9 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -45,8 +46,8 @@ * Validate the given pool name, optionally putting an extended error message in * 'buf'. */ -static int -zpool_name_valid(const char *pool, boolean_t isopen, char *buf, size_t buflen) +static boolean_t +zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool) { namecheck_err_t why; char what; @@ -64,53 +65,52 @@ zpool_name_valid(const char *pool, boolean_t isopen, char *buf, size_t buflen) (strncmp(pool, "mirror", 6) == 0 || strncmp(pool, "raidz", 5) == 0 || strncmp(pool, "spare", 5) == 0)) { - ret = -1; - why = NAME_ERR_RESERVED; + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "name is reserved")); + return (B_FALSE); } if (ret != 0) { - if (buf != NULL) { + if (hdl != NULL) { switch (why) { case NAME_ERR_TOOLONG: - (void) snprintf(buf, buflen, + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "name is too long")); break; case NAME_ERR_INVALCHAR: - (void) snprintf(buf, buflen, + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid character " "'%c' in pool name"), what); break; case NAME_ERR_NOLETTER: - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "name must begin with a letter"), buflen); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "name must begin with a letter")); break; case NAME_ERR_RESERVED: - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "name is reserved\n" - "pool name may have been omitted"), buflen); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "name is reserved")); break; case NAME_ERR_DISKLIKE: - (void) strlcpy(buf, dgettext(TEXT_DOMAIN, - "pool name is reserved\n" - "pool name may have been omitted"), buflen); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool name is reserved")); break; } } - return (FALSE); + return (B_FALSE); } - return (TRUE); + return (B_TRUE); } /* * Set the pool-wide health based on the vdev state of the root vdev. */ -void +int set_pool_health(nvlist_t *config) { nvlist_t *nvroot; @@ -140,11 +140,10 @@ set_pool_health(nvlist_t *config) break; default: - zfs_baderror(vs->vs_state); + abort(); } - verify(nvlist_add_string(config, ZPOOL_CONFIG_POOL_HEALTH, - health) == 0); + return (nvlist_add_string(config, ZPOOL_CONFIG_POOL_HEALTH, health)); } /* @@ -152,28 +151,33 @@ set_pool_health(nvlist_t *config) * state. */ zpool_handle_t * -zpool_open_canfail(const char *pool) +zpool_open_canfail(libzfs_handle_t *hdl, const char *pool) { zpool_handle_t *zhp; - int error; /* * Make sure the pool name is valid. */ - if (!zpool_name_valid(pool, B_TRUE, NULL, 0)) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': invalid " - "pool name"), pool); + if (!zpool_name_valid(hdl, B_TRUE, pool)) { + (void) zfs_error(hdl, EZFS_INVALIDNAME, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), + pool); return (NULL); } - zhp = zfs_malloc(sizeof (zpool_handle_t)); + if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL) + return (NULL); + zhp->zpool_hdl = hdl; (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name)); - if ((error = zpool_refresh_stats(zhp)) != 0) { - if (error == ENOENT || error == EINVAL) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': no " - "such pool"), pool); + if (zpool_refresh_stats(zhp) != 0) { + if (errno == ENOENT || errno == EINVAL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no such pool")); + (void) zfs_error(hdl, EZFS_NOENT, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), + pool); free(zhp); return (NULL); } else { @@ -191,17 +195,18 @@ zpool_open_canfail(const char *pool) * the configuration cache may be out of date). */ zpool_handle_t * -zpool_open_silent(const char *pool) +zpool_open_silent(libzfs_handle_t *hdl, const char *pool) { zpool_handle_t *zhp; - int error; - zhp = zfs_malloc(sizeof (zpool_handle_t)); + if ((zhp = calloc(sizeof (zpool_handle_t), 1)) == NULL) + return (NULL); + zhp->zpool_hdl = hdl; (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name)); - if ((error = zpool_refresh_stats(zhp)) != 0) { - if (error == ENOENT || error == EINVAL) { + if (zpool_refresh_stats(zhp) != 0) { + if (errno == ENOENT || errno == EINVAL) { free(zhp); return (NULL); } else { @@ -219,18 +224,16 @@ zpool_open_silent(const char *pool) * state. */ zpool_handle_t * -zpool_open(const char *pool) +zpool_open(libzfs_handle_t *hdl, const char *pool) { zpool_handle_t *zhp; - if ((zhp = zpool_open_canfail(pool)) == NULL) + if ((zhp = zpool_open_canfail(hdl, pool)) == NULL) return (NULL); if (zhp->zpool_state == POOL_STATE_UNAVAIL) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': pool is " - "currently unavailable"), zhp->zpool_name); - zfs_error(dgettext(TEXT_DOMAIN, "run 'zpool status %s' for " - "detailed information"), zhp->zpool_name); + (void) zfs_error(hdl, EZFS_POOLUNAVAIL, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name); zpool_close(zhp); return (NULL); } @@ -251,7 +254,7 @@ zpool_close(zpool_handle_t *zhp) if (zhp->zpool_error_log) { int i; for (i = 0; i < zhp->zpool_error_count; i++) - free(zhp->zpool_error_log[i]); + nvlist_free(zhp->zpool_error_log[i]); free(zhp->zpool_error_log); } free(zhp); @@ -280,6 +283,20 @@ zpool_get_guid(zpool_handle_t *zhp) } /* + * Return the version of the pool. + */ +uint64_t +zpool_get_version(zpool_handle_t *zhp) +{ + uint64_t version; + + verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_VERSION, + &version) == 0); + + return (version); +} + +/* * Return the amount of space currently consumed by the pool. */ uint64_t @@ -324,7 +341,7 @@ zpool_get_root(zpool_handle_t *zhp, char *buf, size_t buflen) zfs_cmd_t zc = { 0 }; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0 || + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 || zc.zc_root[0] == '\0') return (-1); @@ -348,34 +365,35 @@ zpool_get_state(zpool_handle_t *zhp) * don't have to worry about error semantics. */ int -zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot) +zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, + const char *altroot) { zfs_cmd_t zc = { 0 }; char *packed; size_t len; - int err; - char reason[64]; + char msg[1024]; - if (!zpool_name_valid(pool, B_FALSE, reason, sizeof (reason))) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': %s"), - pool, reason); - return (-1); - } + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot create '%s'"), pool); - if (altroot != NULL && altroot[0] != '/') { - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': alternate " - "root '%s' must be a complete path"), pool, altroot); - return (-1); - } + if (!zpool_name_valid(hdl, B_FALSE, pool)) + return (zfs_error(hdl, EZFS_INVALIDNAME, msg)); - if ((err = nvlist_size(nvroot, &len, NV_ENCODE_NATIVE)) != 0) - zfs_baderror(err); + if (altroot != NULL && altroot[0] != '/') + return (zfs_error(hdl, EZFS_BADPATH, + dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), altroot)); - packed = zfs_malloc(len); + if (nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) != 0) + return (no_memory(hdl)); - if ((err = nvlist_pack(nvroot, &packed, &len, - NV_ENCODE_NATIVE, 0)) != 0) - zfs_baderror(err); + if ((packed = zfs_alloc(hdl, len)) == NULL) + return (-1); + + if (nvlist_pack(nvroot, &packed, &len, + NV_ENCODE_NATIVE, 0) != 0) { + free(packed); + return (no_memory(hdl)); + } (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); zc.zc_config_src = (uint64_t)(uintptr_t)packed; @@ -384,18 +402,10 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot) if (altroot != NULL) (void) strlcpy(zc.zc_root, altroot, sizeof (zc.zc_root)); - if (zfs_ioctl(ZFS_IOC_POOL_CREATE, &zc) != 0) { - switch (errno) { - case EEXIST: - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "pool exists"), pool); - break; - - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "permission denied"), pool); - break; + if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CREATE, &zc) != 0) { + free(packed); + switch (errno) { case EBUSY: /* * This can happen if the user has specified the same @@ -403,14 +413,13 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot) * until we try to add it and see we already have a * label. */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "one or more vdevs refer to the same device"), - pool); - break; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more vdevs refer to the same device")); + return (zfs_error(hdl, EZFS_BADDEV, msg)); case EOVERFLOW: /* - * This occurrs when one of the devices is below + * This occurs when one of the devices is below * SPA_MINDEVSIZE. Unfortunately, we can't detect which * device was the problem device since there's no * reliable way to determine device size from userland. @@ -420,53 +429,20 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot) zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf)); - zfs_error(dgettext(TEXT_DOMAIN, "cannot " - "create '%s': one or more devices is less " - "than the minimum size (%s)"), pool, - buf); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is less than the " + "minimum size (%s)"), buf); } - break; - - case ENAMETOOLONG: - /* - * One of the vdevs has exceeded VDEV_SPEC_MAX length in - * its plaintext representation. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "too many devices in a single vdev"), pool); - break; - - case EIO: - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "I/O error on one or more devices"), pool); - break; - - case ENXIO: - /* - * This is unlikely to happen since we've verified that - * all the devices can be opened from userland, but it's - * still possible in some circumstances. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "one or more devices is unavailable"), pool); - break; + return (zfs_error(hdl, EZFS_BADDEV, msg)); case ENOSPC: - /* - * This can occur if we were incapable of writing to a - * file vdev because the underlying filesystem is out of - * space. This is very similar to EOVERFLOW, but we'll - * produce a slightly different message. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': " - "one or more devices is out of space"), pool); - break; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is out of space")); + return (zfs_error(hdl, EZFS_BADDEV, msg)); default: - zfs_baderror(errno); + return (zpool_standard_error(hdl, errno, msg)); } - - return (-1); } free(packed); @@ -478,7 +454,7 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot) if (altroot != NULL) { zfs_handle_t *zhp; - verify((zhp = zfs_open(pool, ZFS_TYPE_ANY)) != NULL); + verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_ANY)) != NULL); verify(zfs_prop_set(zhp, ZFS_PROP_MOUNTPOINT, "/") == 0); zfs_close(zhp); @@ -496,9 +472,12 @@ zpool_destroy(zpool_handle_t *zhp) { zfs_cmd_t zc = { 0 }; zfs_handle_t *zfp = NULL; + libzfs_handle_t *hdl = zhp->zpool_hdl; + char msg[1024]; if (zhp->zpool_state == POOL_STATE_ACTIVE && - (zfp = zfs_open(zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL) + (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name, + ZFS_TYPE_FILESYSTEM)) == NULL) return (-1); if (zpool_remove_zvol_links(zhp) != NULL) @@ -506,35 +485,16 @@ zpool_destroy(zpool_handle_t *zhp) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (zfs_ioctl(ZFS_IOC_POOL_DESTROY, &zc) != 0) { - switch (errno) { - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot destroy '%s': permission denied"), - zhp->zpool_name); - break; - - case EBUSY: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot destroy '%s': pool busy"), - zhp->zpool_name); - break; - - case ENOENT: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot destroy '%s': no such pool"), - zhp->zpool_name); - break; - - case EROFS: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot destroy '%s': one or more devices is " - "read only, or '/' is mounted read only"), - zhp->zpool_name); - break; + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_DESTROY, &zc) != 0) { + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot destroy '%s'"), zhp->zpool_name); - default: - zfs_baderror(errno); + if (errno == EROFS) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is read only")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); + } else { + (void) zpool_standard_error(hdl, errno, msg); } if (zfp) @@ -560,10 +520,27 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) char *packed; size_t len; zfs_cmd_t zc; + int ret; + libzfs_handle_t *hdl = zhp->zpool_hdl; + char msg[1024]; + nvlist_t **spares; + uint_t nspares; + + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot add to '%s'"), zhp->zpool_name); + + if (zpool_get_version(zhp) < ZFS_VERSION_SPARES && + nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be " + "upgraded to add hot spares")); + return (zfs_error(hdl, EZFS_BADVERSION, msg)); + } verify(nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) == 0); - packed = zfs_malloc(len); + if ((packed = zfs_alloc(zhp->zpool_hdl, len)) == NULL) + return (-1); verify(nvlist_pack(nvroot, &packed, &len, NV_ENCODE_NATIVE, 0) == 0); @@ -571,13 +548,8 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) zc.zc_config_src = (uint64_t)(uintptr_t)packed; zc.zc_config_src_size = len; - if (zfs_ioctl(ZFS_IOC_VDEV_ADD, &zc) != 0) { + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ADD, &zc) != 0) { switch (errno) { - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': " - "permission denied"), zhp->zpool_name); - break; - case EBUSY: /* * This can happen if the user has specified the same @@ -585,30 +557,9 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) * until we try to add it and see we already have a * label. */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': " - "one or more vdevs refer to the same device"), - zhp->zpool_name); - break; - - case ENAMETOOLONG: - /* - * One of the vdevs has exceeded VDEV_SPEC_MAX length in - * its plaintext representation. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': " - "too many devices in a single vdev"), - zhp->zpool_name); - break; - - case ENXIO: - /* - * This is unlikely to happen since we've verified that - * all the devices can be opened from userland, but it's - * still possible in some circumstances. - */ - zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': " - "one or more devices is unavailable"), - zhp->zpool_name); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more vdevs refer to the same device")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case EOVERFLOW: @@ -623,23 +574,31 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf)); - zfs_error(dgettext(TEXT_DOMAIN, "cannot " - "add to '%s': one or more devices is less " - "than the minimum size (%s)"), - zhp->zpool_name, buf); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "device is less than the minimum " + "size (%s)"), buf); } + (void) zfs_error(hdl, EZFS_BADDEV, msg); + break; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded to add raidz2 vdevs")); + (void) zfs_error(hdl, EZFS_BADVERSION, msg); break; default: - zfs_baderror(errno); + (void) zpool_standard_error(hdl, errno, msg); } - return (-1); + ret = -1; + } else { + ret = 0; } free(packed); - return (0); + return (ret); } /* @@ -656,32 +615,10 @@ zpool_export(zpool_handle_t *zhp) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (zfs_ioctl(ZFS_IOC_POOL_EXPORT, &zc) != 0) { - switch (errno) { - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot export '%s': permission denied"), - zhp->zpool_name); - break; - - case EBUSY: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot export '%s': pool is in use"), - zhp->zpool_name); - break; - - case ENOENT: - zfs_error(dgettext(TEXT_DOMAIN, - "cannot export '%s': no such pool"), - zhp->zpool_name); - break; - - default: - zfs_baderror(errno); - } - - return (-1); - } + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_EXPORT, &zc) != 0) + return (zpool_standard_error(zhp->zpool_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot export '%s'"), + zhp->zpool_name)); return (0); } @@ -693,7 +630,8 @@ zpool_export(zpool_handle_t *zhp) * an alternate root, respectively. */ int -zpool_import(nvlist_t *config, const char *newname, const char *altroot) +zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, + const char *altroot) { zfs_cmd_t zc; char *packed; @@ -706,22 +644,19 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot) &origname) == 0); if (newname != NULL) { - if (!zpool_name_valid(newname, B_FALSE, NULL, 0)) { - zfs_error(dgettext(TEXT_DOMAIN, "cannot import '%s': " - "invalid pool name"), newname); - return (-1); - } + if (!zpool_name_valid(hdl, B_FALSE, newname)) + return (zfs_error(hdl, EZFS_INVALIDNAME, + dgettext(TEXT_DOMAIN, "cannot import '%s'"), + newname)); thename = (char *)newname; } else { thename = origname; } - if (altroot != NULL && altroot[0] != '/') { - zfs_error(dgettext(TEXT_DOMAIN, "cannot import '%s': alternate " - "root '%s' must be a complete path"), thename, - altroot); - return (-1); - } + if (altroot != NULL && altroot[0] != '/') + return (zfs_error(hdl, EZFS_BADPATH, + dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), + altroot)); (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name)); @@ -735,7 +670,8 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot) verify(nvlist_size(config, &len, NV_ENCODE_NATIVE) == 0); - packed = zfs_malloc(len); + if ((packed = zfs_alloc(hdl, len)) == NULL) + return (-1); verify(nvlist_pack(config, &packed, &len, NV_ENCODE_NATIVE, 0) == 0); @@ -743,7 +679,7 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot) zc.zc_config_src_size = len; ret = 0; - if (zfs_ioctl(ZFS_IOC_POOL_IMPORT, &zc) != 0) { + if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_IMPORT, &zc) != 0) { char desc[1024]; if (newname == NULL) (void) snprintf(desc, sizeof (desc), @@ -755,42 +691,15 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot) origname, thename); switch (errno) { - case EEXIST: - /* - * A pool with that name already exists. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: pool exists"), - desc); - break; - - case EPERM: - /* - * The user doesn't have permission to create pools. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: permission " - "denied"), desc); - break; - - case ENXIO: - case EDOM: - /* - * Device is unavailable, or vdev sum didn't match. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: one or more " - "devices is unavailable"), - desc); - break; - case ENOTSUP: /* * Unsupported version. */ - zfs_error(dgettext(TEXT_DOMAIN, - "%s: unsupported version"), desc); + (void) zfs_error(hdl, EZFS_BADVERSION, desc); break; default: - zfs_baderror(errno); + (void) zpool_standard_error(hdl, errno, desc); } ret = -1; @@ -799,7 +708,7 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot) /* * This should never fail, but play it safe anyway. */ - if ((zhp = zpool_open_silent(thename)) != NULL) { + if ((zhp = zpool_open_silent(hdl, thename)) != NULL) { ret = zpool_create_zvol_links(zhp); zpool_close(zhp); } @@ -817,48 +726,35 @@ zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type) { zfs_cmd_t zc = { 0 }; char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_cookie = type; - if (zfs_ioctl(ZFS_IOC_POOL_SCRUB, &zc) == 0) + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SCRUB, &zc) == 0) return (0); (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name); - switch (errno) { - case EPERM: - /* - * No permission to scrub this pool. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg); - break; - - case EBUSY: - /* - * Resilver in progress. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: currently resilvering"), - msg); - break; - - default: - zfs_baderror(errno); - } - return (-1); + if (errno == EBUSY) + return (zfs_error(hdl, EZFS_RESILVERING, msg)); + else + return (zpool_standard_error(hdl, errno, msg)); } -static uint64_t -vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid) +static nvlist_t * +vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid, + boolean_t *isspare) { uint_t c, children; nvlist_t **child; - uint64_t ret, present; + uint64_t theguid, present; char *path; uint64_t wholedisk = 0; + nvlist_t *ret; - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &ret) == 0); + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0); if (search == NULL && nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) { @@ -866,8 +762,8 @@ vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid) * If the device has never been present since import, the only * reliable way to match the vdev is by GUID. */ - if (ret == guid) - return (ret); + if (theguid == guid) + return (nv); } else if (search != NULL && nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, @@ -879,28 +775,37 @@ vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid) */ if (strlen(search) == strlen(path) - 2 && strncmp(search, path, strlen(search)) == 0) - return (ret); + return (nv); } else if (strcmp(search, path) == 0) { - return (ret); + return (nv); } } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) - return (0); + return (NULL); for (c = 0; c < children; c++) - if ((ret = vdev_to_guid(child[c], search, guid)) != 0) + if ((ret = vdev_to_nvlist_iter(child[c], search, guid, + isspare)) != NULL) return (ret); - return (0); + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if ((ret = vdev_to_nvlist_iter(child[c], search, guid, + isspare)) != NULL) { + *isspare = B_TRUE; + return (ret); + } + } + } + + return (NULL); } -/* - * Given a string describing a vdev, returns the matching GUID, or 0 if none. - */ -uint64_t -zpool_vdev_to_guid(zpool_handle_t *zhp, const char *path) +nvlist_t * +zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *isspare) { char buf[MAXPATHLEN]; const char *search; @@ -921,7 +826,8 @@ zpool_vdev_to_guid(zpool_handle_t *zhp, const char *path) verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - return (vdev_to_guid(nvroot, search, guid)); + *isspare = B_FALSE; + return (vdev_to_nvlist_iter(nvroot, search, guid, isspare)); } /* @@ -932,39 +838,26 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path) { zfs_cmd_t zc = { 0 }; char msg[1024]; + nvlist_t *tgt; + boolean_t isspare; + libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot online %s"), path); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) { - zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"), - msg); - return (-1); - } + if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == NULL) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); - if (zfs_ioctl(ZFS_IOC_VDEV_ONLINE, &zc) == 0) - return (0); + if (isspare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); - switch (errno) { - case ENODEV: - /* - * Device doesn't exist - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg); - break; + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - case EPERM: - /* - * No permission to bring this vdev online. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg); - break; + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ONLINE, &zc) == 0) + return (0); - default: - zfs_baderror(errno); - } - return (-1); + return (zpool_standard_error(hdl, errno, msg)); } /* @@ -975,48 +868,66 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, int istmp) { zfs_cmd_t zc = { 0 }; char msg[1024]; + nvlist_t *tgt; + boolean_t isspare; + libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot offline %s"), path); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) { - zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"), - msg); - return (-1); - } + if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == NULL) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + + if (isspare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); zc.zc_cookie = istmp; - if (zfs_ioctl(ZFS_IOC_VDEV_OFFLINE, &zc) == 0) + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_OFFLINE, &zc) == 0) return (0); switch (errno) { - case ENODEV: - /* - * Device doesn't exist - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg); - break; - - case EPERM: - /* - * No permission to take this vdev offline. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg); - break; + case EBUSY: - case EBUSY: /* * There are no other replicas of this device. */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: no valid replicas"), msg); - break; + return (zfs_error(hdl, EZFS_NOREPLICAS, msg)); - default: - zfs_baderror(errno); + default: + return (zpool_standard_error(hdl, errno, msg)); } - return (-1); +} + +/* + * Returns TRUE if the given nvlist is a vdev that was originally swapped in as + * a hot spare. + */ +static boolean_t +is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which) +{ + nvlist_t **child; + uint_t c, children; + char *type; + + if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child, + &children) == 0) { + verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE, + &type) == 0); + + if (strcmp(type, VDEV_TYPE_SPARE) == 0 && + children == 2 && child[which] == tgt) + return (B_TRUE); + + for (c = 0; c < children; c++) + if (is_replacing_spare(child[c], tgt, which)) + return (B_TRUE); + } + + return (B_FALSE); } /* @@ -1032,6 +943,14 @@ zpool_vdev_attach(zpool_handle_t *zhp, char *packed; int ret; size_t len; + nvlist_t *tgt; + boolean_t isspare; + uint64_t val; + char *path; + nvlist_t **child; + uint_t children; + nvlist_t *config_root; + libzfs_handle_t *hdl = zhp->zpool_hdl; if (replacing) (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -1041,23 +960,63 @@ zpool_vdev_attach(zpool_handle_t *zhp, "cannot attach %s to %s"), new_disk, old_disk); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((zc.zc_guid = zpool_vdev_to_guid(zhp, old_disk)) == 0) { - zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"), - msg); - return (-1); - } + if ((tgt = zpool_find_vdev(zhp, old_disk, &isspare)) == 0) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + + if (isspare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); zc.zc_cookie = replacing; + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0 || children != 1) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "new device must be a single disk")); + return (zfs_error(hdl, EZFS_INVALCONFIG, msg)); + } + + verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), + ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0); + + /* + * If the target is a hot spare that has been swapped in, we can only + * replace it with another hot spare. + */ + if (replacing && + nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 && + nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 && + (zpool_find_vdev(zhp, path, &isspare) == NULL || !isspare) && + is_replacing_spare(config_root, tgt, 1)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "can only be replaced by another hot spare")); + return (zfs_error(hdl, EZFS_BADTARGET, msg)); + } + + /* + * If we are attempting to replace a spare, it canot be applied to an + * already spared device. + */ + if (replacing && + nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 && + zpool_find_vdev(zhp, path, &isspare) != NULL && isspare && + is_replacing_spare(config_root, tgt, 0)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "device has already been replaced with a spare")); + return (zfs_error(hdl, EZFS_BADTARGET, msg)); + } + verify(nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) == 0); - packed = zfs_malloc(len); + if ((packed = zfs_alloc(zhp->zpool_hdl, len)) == NULL) + return (-1); verify(nvlist_pack(nvroot, &packed, &len, NV_ENCODE_NATIVE, 0) == 0); zc.zc_config_src = (uint64_t)(uintptr_t)packed; zc.zc_config_src_size = len; - ret = zfs_ioctl(ZFS_IOC_VDEV_ATTACH, &zc); + ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc); free(packed); @@ -1065,87 +1024,65 @@ zpool_vdev_attach(zpool_handle_t *zhp, return (0); switch (errno) { - case EPERM: - /* - * No permission to mess with the config. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg); - break; - - case ENODEV: - /* - * Device doesn't exist. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: %s not in pool"), - msg, old_disk); - break; - case ENOTSUP: /* * Can't attach to or replace this type of vdev. */ if (replacing) - zfs_error(dgettext(TEXT_DOMAIN, - "%s: cannot replace a replacing device"), msg); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot replace a replacing device")); else - zfs_error(dgettext(TEXT_DOMAIN, - "%s: attach is only applicable to mirrors"), msg); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "can only attach to mirrors and top-level " + "disks")); + (void) zfs_error(hdl, EZFS_BADTARGET, msg); break; case EINVAL: /* * The new device must be a single disk. */ - zfs_error(dgettext(TEXT_DOMAIN, - "%s: <new_device> must be a single disk"), msg); - break; - - case ENXIO: - /* - * This is unlikely to happen since we've verified that - * all the devices can be opened from userland, but it's - * still possible in some circumstances. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: %s is unavailable"), - msg, new_disk); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "new device must be a single disk")); + (void) zfs_error(hdl, EZFS_INVALCONFIG, msg); break; case EBUSY: - /* - * The new device is is use. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: %s busy"), msg, new_disk); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"), + new_disk); + (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case EOVERFLOW: /* * The new device is too small. */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: %s is too small"), - msg, new_disk); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "device is too small")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case EDOM: /* * The new device has a different alignment requirement. */ - zfs_error(dgettext(TEXT_DOMAIN, - "%s: devices have different sector alignment"), msg); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "devices have different sector alignment")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); break; case ENAMETOOLONG: /* * The resulting top-level vdev spec won't fit in the label. */ - zfs_error(dgettext(TEXT_DOMAIN, - "%s: too many devices in a single vdev"), msg); + (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg); break; default: - zfs_baderror(errno); + (void) zpool_standard_error(hdl, errno, msg); } - return (1); + return (-1); } /* @@ -1156,55 +1093,81 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path) { zfs_cmd_t zc = { 0 }; char msg[1024]; + nvlist_t *tgt; + boolean_t isspare; + libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot detach %s"), path); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) { - zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"), - msg); - return (-1); - } + if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); - if (zfs_ioctl(ZFS_IOC_VDEV_DETACH, &zc) == 0) + if (isspare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_DETACH, &zc) == 0) return (0); switch (errno) { - case EPERM: - /* - * No permission to mess with the config. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg); - break; - - case ENODEV: - /* - * Device doesn't exist. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg); - break; case ENOTSUP: /* * Can't detach from this type of vdev. */ - zfs_error(dgettext(TEXT_DOMAIN, - "%s: only applicable to mirror and replacing vdevs"), msg); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only " + "applicable to mirror and replacing vdevs")); + (void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg); break; case EBUSY: /* * There are no other replicas of this device. */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: no valid replicas"), msg); + (void) zfs_error(hdl, EZFS_NOREPLICAS, msg); break; default: - zfs_baderror(errno); + (void) zpool_standard_error(hdl, errno, msg); } - return (1); + return (-1); +} + +/* + * Remove the given device. Currently, this is supported only for hot spares. + */ +int +zpool_vdev_remove(zpool_handle_t *zhp, const char *path) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + nvlist_t *tgt; + boolean_t isspare; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot remove %s"), path); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + + if (!isspare) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "only hot spares can be removed")); + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + } + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_REMOVE, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, msg)); } /* @@ -1215,6 +1178,9 @@ zpool_clear(zpool_handle_t *zhp, const char *path) { zfs_cmd_t zc = { 0 }; char msg[1024]; + nvlist_t *tgt; + boolean_t isspare; + libzfs_handle_t *hdl = zhp->zpool_hdl; if (path) (void) snprintf(msg, sizeof (msg), @@ -1226,35 +1192,21 @@ zpool_clear(zpool_handle_t *zhp, const char *path) zhp->zpool_name); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (path && (zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) { - zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"), - msg); - return (-1); - } + if (path) { + if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); - if (zfs_ioctl(ZFS_IOC_CLEAR, &zc) == 0) - return (0); + if (isspare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); - switch (errno) { - case EPERM: - /* - * No permission to mess with the config. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg); - break; - - case ENODEV: - /* - * Device doesn't exist. - */ - zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg); - break; - - default: - zfs_baderror(errno); + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, + &zc.zc_guid) == 0); } - return (1); + if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, msg)); } static int @@ -1269,9 +1221,9 @@ do_zvol(zfs_handle_t *zhp, void *data) */ if (zhp->zfs_volblocksize != 0) { if (linktype) - ret = zvol_create_link(zhp->zfs_name); + ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); else - ret = zvol_remove_link(zhp->zfs_name); + ret = zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name); } ret = zfs_iter_children(zhp, do_zvol, data); @@ -1292,10 +1244,11 @@ zpool_create_zvol_links(zpool_handle_t *zhp) /* * If the pool is unavailable, just return success. */ - if ((zfp = make_dataset_handle(zhp->zpool_name)) == NULL) + if ((zfp = make_dataset_handle(zhp->zpool_hdl, + zhp->zpool_name)) == NULL) return (0); - ret = zfs_iter_children(zfp, do_zvol, (void *)TRUE); + ret = zfs_iter_children(zfp, do_zvol, (void *)B_TRUE); zfs_close(zfp); return (ret); @@ -1313,10 +1266,11 @@ zpool_remove_zvol_links(zpool_handle_t *zhp) /* * If the pool is unavailable, just return success. */ - if ((zfp = make_dataset_handle(zhp->zpool_name)) == NULL) + if ((zfp = make_dataset_handle(zhp->zpool_hdl, + zhp->zpool_name)) == NULL) return (0); - ret = zfs_iter_children(zfp, do_zvol, (void *)FALSE); + ret = zfs_iter_children(zfp, do_zvol, (void *)B_FALSE); zfs_close(zfp); return (ret); @@ -1345,7 +1299,9 @@ devid_to_path(char *devid_str) if (ret != 0) return (NULL); - path = zfs_strdup(list[0].devname); + if ((path = strdup(list[0].devname)) == NULL) + return (NULL); + devid_free_nmlist(list); return (path); @@ -1393,7 +1349,7 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - (void) zfs_ioctl(ZFS_IOC_VDEV_SETPATH, &zc); + (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc); } /* @@ -1412,7 +1368,7 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) * of these checks. */ char * -zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv) +zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) { char *path, *devid; uint64_t value; @@ -1442,17 +1398,17 @@ zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv) * Update the path appropriately. */ set_path(zhp, nv, newpath); - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_PATH, newpath) == 0); + if (nvlist_add_string(nv, + ZPOOL_CONFIG_PATH, newpath) == 0) + verify(nvlist_lookup_string(nv, + ZPOOL_CONFIG_PATH, + &path) == 0); free(newpath); - verify(nvlist_lookup_string(nv, - ZPOOL_CONFIG_PATH, &path) == 0); } - - if (newdevid) - devid_str_free(newdevid); } + if (newdevid) + devid_str_free(newdevid); } if (strncmp(path, "/dev/dsk/", 9) == 0) @@ -1460,15 +1416,28 @@ zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv) if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value) == 0 && value) { - char *tmp = zfs_strdup(path); + char *tmp = zfs_strdup(hdl, path); + if (tmp == NULL) + return (NULL); tmp[strlen(path) - 2] = '\0'; return (tmp); } } else { verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0); + + /* + * If it's a raidz device, we need to stick in the parity level. + */ + if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) { + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, + &value) == 0); + (void) snprintf(buf, sizeof (buf), "%s%llu", path, + value); + path = buf; + } } - return (zfs_strdup(path)); + return (zfs_strdup(hdl, path)); } static int @@ -1502,15 +1471,20 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem) */ verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT, &count) == 0); - zc.zc_config_dst = (uintptr_t)zfs_malloc(count * sizeof (zbookmark_t)); + if ((zc.zc_config_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl, + count * sizeof (zbookmark_t))) == NULL) + return (-1); zc.zc_config_dst_size = count; (void) strcpy(zc.zc_name, zhp->zpool_name); for (;;) { - if (zfs_ioctl(ZFS_IOC_ERROR_LOG, &zc) != 0) { + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG, + &zc) != 0) { + free((void *)(uintptr_t)zc.zc_config_dst); if (errno == ENOMEM) { - free((void *)(uintptr_t)zc.zc_config_dst); - zc.zc_config_dst = (uintptr_t) - zfs_malloc(zc.zc_config_dst_size); + if ((zc.zc_config_dst = (uintptr_t) + zfs_alloc(zhp->zpool_hdl, + zc.zc_config_dst_size)) == NULL) + return (-1); } else { return (-1); } @@ -1549,6 +1523,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem) */ if (list == NULL) { *nelem = j; + free((void *)(uintptr_t)zc.zc_config_dst); return (0); } @@ -1557,7 +1532,11 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem) /* * Allocate an array of nvlists to hold the results */ - zhp->zpool_error_log = zfs_malloc(j * sizeof (nvlist_t *)); + if ((zhp->zpool_error_log = zfs_alloc(zhp->zpool_hdl, + j * sizeof (nvlist_t *))) == NULL) { + free((void *)(uintptr_t)zc.zc_config_dst); + return (-1); + } /* * Fill in the results with names from the kernel. @@ -1571,31 +1550,37 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem) sizeof (zbookmark_t)) == 0) continue; - verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, - 0) == 0); + if (nvlist_alloc(&nv, NV_UNIQUE_NAME, + 0) != 0) + goto nomem; zhp->zpool_error_log[j] = nv; zc.zc_bookmark = zb[i]; - if (zfs_ioctl(ZFS_IOC_BOOKMARK_NAME, &zc) == 0) { - verify(nvlist_add_string(nv, ZPOOL_ERR_DATASET, - zc.zc_prop_name) == 0); - verify(nvlist_add_string(nv, ZPOOL_ERR_OBJECT, - zc.zc_prop_value) == 0); - verify(nvlist_add_string(nv, ZPOOL_ERR_RANGE, - zc.zc_filename) == 0); + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_BOOKMARK_NAME, + &zc) == 0) { + if (nvlist_add_string(nv, ZPOOL_ERR_DATASET, + zc.zc_prop_name) != 0 || + nvlist_add_string(nv, ZPOOL_ERR_OBJECT, + zc.zc_prop_value) != 0 || + nvlist_add_string(nv, ZPOOL_ERR_RANGE, + zc.zc_filename) != 0) + goto nomem; } else { (void) snprintf(buf, sizeof (buf), "%llx", zb[i].zb_objset); - verify(nvlist_add_string(nv, - ZPOOL_ERR_DATASET, buf) == 0); + if (nvlist_add_string(nv, + ZPOOL_ERR_DATASET, buf) != 0) + goto nomem; (void) snprintf(buf, sizeof (buf), "%llx", zb[i].zb_object); - verify(nvlist_add_string(nv, ZPOOL_ERR_OBJECT, - buf) == 0); + if (nvlist_add_string(nv, ZPOOL_ERR_OBJECT, + buf) != 0) + goto nomem; (void) snprintf(buf, sizeof (buf), "lvl=%u blkid=%llu", (int)zb[i].zb_level, (long long)zb[i].zb_blkid); - verify(nvlist_add_string(nv, ZPOOL_ERR_RANGE, - buf) == 0); + if (nvlist_add_string(nv, ZPOOL_ERR_RANGE, + buf) != 0) + goto nomem; } j++; @@ -1607,6 +1592,16 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem) free((void *)(uintptr_t)zc.zc_config_dst); return (0); + +nomem: + free((void *)(uintptr_t)zc.zc_config_dst); + for (i = 0; i < zhp->zpool_error_count; i++) { + if (zhp->zpool_error_log[i]) + free(zhp->zpool_error_log[i]); + } + free(zhp->zpool_error_log); + zhp->zpool_error_log = NULL; + return (no_memory(zhp->zpool_hdl)); } /* @@ -1616,20 +1611,13 @@ int zpool_upgrade(zpool_handle_t *zhp) { zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strcpy(zc.zc_name, zhp->zpool_name); - if (zfs_ioctl(ZFS_IOC_POOL_UPGRADE, &zc) != 0) { - switch (errno) { - case EPERM: - zfs_error(dgettext(TEXT_DOMAIN, "cannot upgrade '%s': " - "permission denied"), zhp->zpool_name); - break; - default: - zfs_baderror(errno); - } - - return (-1); - } + if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_UPGRADE, &zc) != 0) + return (zpool_standard_error(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"), + zhp->zpool_name)); return (0); } diff --git a/usr/src/lib/libzfs/common/libzfs_status.c b/usr/src/lib/libzfs/common/libzfs_status.c index 258b2e2f7d..2a4164964d 100644 --- a/usr/src/lib/libzfs/common/libzfs_status.c +++ b/usr/src/lib/libzfs/common/libzfs_status.c @@ -116,7 +116,7 @@ vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) /* * Detect if any leaf devices that have seen errors or could not be opened. */ -static int +static boolean_t find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) { nvlist_t **child; @@ -132,13 +132,13 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) */ verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); if (strcmp(type, VDEV_TYPE_REPLACING) == 0) - return (FALSE); + return (B_FALSE); if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) if (find_vdev_problem(child[c], func)) - return (TRUE); + return (B_TRUE); } else { verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &c) == 0); @@ -147,10 +147,10 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) vs->vs_read_errors + vs->vs_write_errors + vs->vs_checksum_errors)) - return (TRUE); + return (B_TRUE); } - return (FALSE); + return (B_FALSE); } /* @@ -171,7 +171,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) * only picks the most damaging of all the current errors to report. */ static zpool_status_t -check_status(nvlist_t *config, int isimport) +check_status(nvlist_t *config, boolean_t isimport) { nvlist_t *nvroot; vdev_stat_t *vs; @@ -265,7 +265,7 @@ check_status(nvlist_t *config, int isimport) zpool_status_t zpool_get_status(zpool_handle_t *zhp, char **msgid) { - zpool_status_t ret = check_status(zhp->zpool_config, FALSE); + zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); if (ret >= NMSGID) *msgid = NULL; @@ -278,7 +278,7 @@ zpool_get_status(zpool_handle_t *zhp, char **msgid) zpool_status_t zpool_import_status(nvlist_t *config, char **msgid) { - zpool_status_t ret = check_status(config, TRUE); + zpool_status_t ret = check_status(config, B_TRUE); if (ret >= NMSGID) *msgid = NULL; diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c index c7f7528491..29e99dc5b1 100644 --- a/usr/src/lib/libzfs/common/libzfs_util.c +++ b/usr/src/lib/libzfs/common/libzfs_util.c @@ -43,90 +43,320 @@ #include "libzfs_impl.h" -static int zfs_fd = -1; -static FILE *mnttab_file; -static FILE *sharetab_file; -static int sharetab_opened; +int +libzfs_errno(libzfs_handle_t *hdl) +{ + return (hdl->libzfs_error); +} -void (*error_func)(const char *, va_list); +const char * +libzfs_error_action(libzfs_handle_t *hdl) +{ + return (hdl->libzfs_action); +} -/* - * All error handling is kept within libzfs where we have the most information - * immediately available. While this may not be suitable for a general purpose - * library, it greatly simplifies our commands. This command name is used to - * prefix all error messages appropriately. - */ +const char * +libzfs_error_description(libzfs_handle_t *hdl) +{ + if (hdl->libzfs_desc[0] != '\0') + return (hdl->libzfs_desc); + + switch (hdl->libzfs_error) { + case EZFS_NOMEM: + return (dgettext(TEXT_DOMAIN, "out of memory")); + case EZFS_BADPROP: + return (dgettext(TEXT_DOMAIN, "invalid property value")); + case EZFS_PROPREADONLY: + return (dgettext(TEXT_DOMAIN, "read only property")); + case EZFS_PROPTYPE: + return (dgettext(TEXT_DOMAIN, "property doesn't apply to " + "datasets of this type")); + case EZFS_PROPNONINHERIT: + return (dgettext(TEXT_DOMAIN, "property cannot be inherited")); + case EZFS_PROPSPACE: + return (dgettext(TEXT_DOMAIN, "invalid quota or reservation")); + case EZFS_BADTYPE: + return (dgettext(TEXT_DOMAIN, "operation not applicable to " + "datasets of this type")); + case EZFS_BUSY: + return (dgettext(TEXT_DOMAIN, "pool or dataset is busy")); + case EZFS_EXISTS: + return (dgettext(TEXT_DOMAIN, "pool or dataset exists")); + case EZFS_NOENT: + return (dgettext(TEXT_DOMAIN, "no such pool or dataset")); + case EZFS_BADSTREAM: + return (dgettext(TEXT_DOMAIN, "invalid backup stream")); + case EZFS_DSREADONLY: + return (dgettext(TEXT_DOMAIN, "dataset is read only")); + case EZFS_VOLTOOBIG: + return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for " + "this system")); + case EZFS_VOLHASDATA: + return (dgettext(TEXT_DOMAIN, "volume has data")); + case EZFS_INVALIDNAME: + return (dgettext(TEXT_DOMAIN, "invalid name")); + case EZFS_BADRESTORE: + return (dgettext(TEXT_DOMAIN, "unable to restore to " + "destination")); + case EZFS_BADBACKUP: + return (dgettext(TEXT_DOMAIN, "backup failed")); + case EZFS_BADTARGET: + return (dgettext(TEXT_DOMAIN, "invalid target vdev")); + case EZFS_NODEVICE: + return (dgettext(TEXT_DOMAIN, "no such device in pool")); + case EZFS_BADDEV: + return (dgettext(TEXT_DOMAIN, "invalid device")); + case EZFS_NOREPLICAS: + return (dgettext(TEXT_DOMAIN, "no valid replicas")); + case EZFS_RESILVERING: + return (dgettext(TEXT_DOMAIN, "currently resilvering")); + case EZFS_BADVERSION: + return (dgettext(TEXT_DOMAIN, "unsupported version")); + case EZFS_POOLUNAVAIL: + return (dgettext(TEXT_DOMAIN, "pool is unavailable")); + case EZFS_DEVOVERFLOW: + return (dgettext(TEXT_DOMAIN, "too many devices in one vdev")); + case EZFS_BADPATH: + return (dgettext(TEXT_DOMAIN, "must be an absolute path")); + case EZFS_CROSSTARGET: + return (dgettext(TEXT_DOMAIN, "operation crosses datasets or " + "pools")); + case EZFS_ZONED: + return (dgettext(TEXT_DOMAIN, "dataset in use by local zone")); + case EZFS_MOUNTFAILED: + return (dgettext(TEXT_DOMAIN, "mount failed")); + case EZFS_UMOUNTFAILED: + return (dgettext(TEXT_DOMAIN, "umount failed")); + case EZFS_UNSHAREFAILED: + return (dgettext(TEXT_DOMAIN, "unshare(1M) failed")); + case EZFS_SHAREFAILED: + return (dgettext(TEXT_DOMAIN, "share(1M) failed")); + case EZFS_DEVLINKS: + return (dgettext(TEXT_DOMAIN, "failed to create /dev links")); + case EZFS_PERM: + return (dgettext(TEXT_DOMAIN, "permission denied")); + case EZFS_NOSPC: + return (dgettext(TEXT_DOMAIN, "out of space")); + case EZFS_IO: + return (dgettext(TEXT_DOMAIN, "I/O error")); + case EZFS_INTR: + return (dgettext(TEXT_DOMAIN, "signal received")); + case EZFS_ISSPARE: + return (dgettext(TEXT_DOMAIN, "device is reserved as a hot " + "spare")); + case EZFS_INVALCONFIG: + return (dgettext(TEXT_DOMAIN, "invalid vdev configuration")); + case EZFS_UNKNOWN: + return (dgettext(TEXT_DOMAIN, "unknown error")); + default: + abort(); + } + + /* NOTREACHED */ +} + +/*PRINTFLIKE2*/ void -zfs_error(const char *fmt, ...) +zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - if (error_func != NULL) { - error_func(fmt, ap); - } else { - (void) vfprintf(stderr, fmt, ap); - (void) fprintf(stderr, "\n"); + (void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc), + fmt, ap); + hdl->libzfs_desc_active = 1; + + va_end(ap); +} + +static void +zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap) +{ + (void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action), + fmt, ap); + hdl->libzfs_error = error; + + if (hdl->libzfs_desc_active) + hdl->libzfs_desc_active = 0; + else + hdl->libzfs_desc[0] = '\0'; + + if (hdl->libzfs_printerr) { + if (error == EZFS_UNKNOWN) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal " + "error: %s\n"), libzfs_error_description(hdl)); + abort(); + } + + (void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action, + libzfs_error_description(hdl)); + if (error == EZFS_NOMEM) + exit(1); } +} + +/*PRINTFLIKE3*/ +int +zfs_error(libzfs_handle_t *hdl, int error, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + zfs_verror(hdl, error, fmt, ap); va_end(ap); + + return (-1); } -/* - * An internal error is something that we cannot recover from, and should never - * happen (such as running out of memory). It should only be used in - * exceptional circumstances. - */ -void -zfs_fatal(const char *fmt, ...) +static int +zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt, + va_list ap) +{ + switch (error) { + case EPERM: + case EACCES: + zfs_verror(hdl, EZFS_PERM, fmt, ap); + return (-1); + + case EIO: + zfs_verror(hdl, EZFS_IO, fmt, ap); + return (-1); + + case EINTR: + zfs_verror(hdl, EZFS_INTR, fmt, ap); + return (-1); + } + + return (0); +} + +/*PRINTFLIKE3*/ +int +zfs_standard_error(libzfs_handle_t *hdl, int error, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - if (error_func != NULL) { - error_func(fmt, ap); - } else { - (void) vfprintf(stderr, fmt, ap); - (void) fprintf(stderr, "\n"); + if (zfs_common_error(hdl, error, fmt, ap) != 0) { + va_end(ap); + return (-1); } - va_end(ap); - exit(1); + switch (error) { + case ENXIO: + zfs_verror(hdl, EZFS_IO, fmt, ap); + break; + + case ENOENT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset does not exist")); + zfs_verror(hdl, EZFS_NOENT, fmt, ap); + break; + + case ENOSPC: + case EDQUOT: + zfs_verror(hdl, EZFS_NOSPC, fmt, ap); + return (-1); + + case EEXIST: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset already exists")); + zfs_verror(hdl, EZFS_EXISTS, fmt, ap); + break; + + case EBUSY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset is busy")); + zfs_verror(hdl, EZFS_BUSY, fmt, ap); + break; + + default: + zfs_error_aux(hdl, strerror(errno)); + zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); + break; + } + + va_end(ap); + return (-1); } -/* - * Consumers (such as the JNI interface) that need to capture error output can - * override the default error handler using this function. - */ -void -zfs_set_error_handler(void (*func)(const char *, va_list)) +/*PRINTFLIKE3*/ +int +zpool_standard_error(libzfs_handle_t *hdl, int error, const char *fmt, ...) { - error_func = func; + va_list ap; + + va_start(ap, fmt); + + if (zfs_common_error(hdl, error, fmt, ap) != 0) { + va_end(ap); + return (-1); + } + + switch (error) { + case ENODEV: + zfs_verror(hdl, EZFS_NODEVICE, fmt, ap); + break; + + case ENOENT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool")); + zfs_verror(hdl, EZFS_NOENT, fmt, ap); + break; + + case EEXIST: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool already exists")); + zfs_verror(hdl, EZFS_EXISTS, fmt, ap); + break; + + case EBUSY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy")); + zfs_verror(hdl, EZFS_EXISTS, fmt, ap); + break; + + case ENXIO: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is currently unavailable")); + zfs_verror(hdl, EZFS_BADDEV, fmt, ap); + break; + + case ENAMETOOLONG: + zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap); + break; + + default: + zfs_error_aux(hdl, strerror(error)); + zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); + } + + va_end(ap); + return (-1); } /* * Display an out of memory error message and abort the current program. */ -void -no_memory(void) +int +no_memory(libzfs_handle_t *hdl) { - assert(errno == ENOMEM); - zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: out of memory\n")); + return (zfs_error(hdl, EZFS_NOMEM, "internal error")); } /* * A safe form of malloc() which will die if the allocation fails. */ void * -zfs_malloc(size_t size) +zfs_alloc(libzfs_handle_t *hdl, size_t size) { void *data; if ((data = calloc(1, size)) == NULL) - no_memory(); + (void) no_memory(hdl); return (data); } @@ -135,69 +365,17 @@ zfs_malloc(size_t size) * A safe form of strdup() which will die if the allocation fails. */ char * -zfs_strdup(const char *str) +zfs_strdup(libzfs_handle_t *hdl, const char *str) { char *ret; if ((ret = strdup(str)) == NULL) - no_memory(); + (void) no_memory(hdl); return (ret); } /* - * Utility functions around common used files - /dev/zfs, /etc/mnttab, and - * /etc/dfs/sharetab. - */ -int -zfs_ioctl(int cmd, zfs_cmd_t *zc) -{ - if (zfs_fd == -1 && - (zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) - zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: unable to " - "open ZFS device\n"), MNTTAB); - - return (ioctl(zfs_fd, cmd, zc)); -} - -FILE * -zfs_mnttab(void) -{ - if (mnttab_file == NULL && - (mnttab_file = fopen(MNTTAB, "r")) == NULL) - zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: unable to " - "open %s\n"), MNTTAB); - - return (mnttab_file); -} - -FILE * -zfs_sharetab(void) -{ - if (sharetab_opened) - return (sharetab_file); - - sharetab_opened = TRUE; - return (sharetab_file = fopen("/etc/dfs/sharetab", "r")); -} - -/* - * Cleanup function for library. Close any file descriptors that were - * opened as part of the above functions. - */ -#pragma fini(zfs_fini) -void -zfs_fini(void) -{ - if (zfs_fd != -1) - (void) close(zfs_fd); - if (sharetab_file) - (void) fclose(sharetab_file); - if (mnttab_file) - (void) fclose(mnttab_file); -} - -/* * Convert a number to an appropriately human-readable output. */ void @@ -241,3 +419,58 @@ zfs_nicenum(uint64_t num, char *buf, size_t buflen) } } } + +void +libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr) +{ + hdl->libzfs_printerr = printerr; +} + +libzfs_handle_t * +libzfs_init(void) +{ + libzfs_handle_t *hdl; + + if ((hdl = calloc(sizeof (libzfs_handle_t), 1)) == NULL) { + return (NULL); + } + + if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) == NULL) { + free(hdl); + return (NULL); + } + + if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) { + (void) close(hdl->libzfs_fd); + free(hdl); + return (NULL); + } + + hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r"); + + return (hdl); +} + +void +libzfs_fini(libzfs_handle_t *hdl) +{ + (void) close(hdl->libzfs_fd); + if (hdl->libzfs_mnttab) + (void) fclose(hdl->libzfs_mnttab); + if (hdl->libzfs_sharetab) + (void) fclose(hdl->libzfs_sharetab); + namespace_clear(hdl); + free(hdl); +} + +libzfs_handle_t * +zpool_get_handle(zpool_handle_t *zhp) +{ + return (zhp->zpool_hdl); +} + +libzfs_handle_t * +zfs_get_handle(zfs_handle_t *zhp) +{ + return (zhp->zfs_hdl); +} diff --git a/usr/src/lib/libzfs/spec/libzfs.spec b/usr/src/lib/libzfs/spec/libzfs.spec index 1789122711..6120603e18 100644 --- a/usr/src/lib/libzfs/spec/libzfs.spec +++ b/usr/src/lib/libzfs/spec/libzfs.spec @@ -24,6 +24,30 @@ # #ident "%Z%%M% %I% %E% SMI" +function libzfs_fini +version SUNWprivate_1.1 +end + +function libzfs_init +version SUNWprivate_1.1 +end + +function libzfs_errno +version SUNWprivate_1.1 +end + +function libzfs_error_action +version SUNWprivate_1.1 +end + +function libzfs_error_description +version SUNWprivate_1.1 +end + +function libzfs_print_on_error +version SUNWprivate_1.1 +end + function zfs_clone version SUNWprivate_1.1 end @@ -40,6 +64,10 @@ function zfs_destroy version SUNWprivate_1.1 end +function zfs_get_handle +version SUNWprivate_1.1 +end + function zfs_get_name version SUNWprivate_1.1 end @@ -104,6 +132,10 @@ function zfs_open version SUNWprivate_1.1 end +function zfs_promote +version SUNWprivate_1.1 +end + function zfs_prop_column_name version SUNWprivate_1.1 end @@ -188,10 +220,6 @@ function zfs_send version SUNWprivate_1.1 end -function zfs_set_error_handler -version SUNWprivate_1.1 -end - function zfs_share version SUNWprivate_1.1 end @@ -248,6 +276,10 @@ function zpool_export version SUNWprivate_1.1 end +function zpool_find_vdev +version SUNWprivate_1.1 +end + function zpool_find_import version SUNWprivate_1.1 end @@ -264,6 +296,10 @@ function zpool_get_guid version SUNWprivate_1.1 end +function zpool_get_handle +version SUNWprivate_1.1 +end + function zpool_get_name version SUNWprivate_1.1 end @@ -288,6 +324,10 @@ function zpool_get_status version SUNWprivate_1.1 end +function zpool_get_version +version SUNWprivate_1.1 +end + function zpool_import version SUNWprivate_1.1 end @@ -352,6 +392,7 @@ function zpool_vdev_name version SUNWprivate_1.1 end -function zpool_vdev_to_guid -version SUNWprivate_1.1 +function zpool_vdev_remove +version SUNWprivate_1.1 end + diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c index 64270f2cd7..2daeca32e2 100644 --- a/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c +++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c @@ -574,7 +574,7 @@ is_fs_snapshot(zfs_handle_t *zhp) zjni_get_dataset_from_snapshot( zfs_get_name(zhp), parent, sizeof (parent)); - parent_zhp = zfs_open(parent, ZFS_TYPE_ANY); + parent_zhp = zfs_open(g_zfs, parent, ZFS_TYPE_ANY); if (parent_zhp == NULL) { return (-1); } @@ -606,7 +606,8 @@ zjni_create_add_Pool(zpool_handle_t *zphp, void *data) zjni_Collection_t *list = ((zjni_ArrayCallbackData_t *)data)->list; /* Get root fs for this pool -- may be NULL if pool is faulted */ - zfs_handle_t *zhp = zfs_open(zpool_get_name(zphp), ZFS_TYPE_FILESYSTEM); + zfs_handle_t *zhp = zfs_open(g_zfs, zpool_get_name(zphp), + ZFS_TYPE_FILESYSTEM); jobject bean = create_PoolBean(env, zphp, zhp); @@ -682,7 +683,7 @@ zjni_get_Datasets_below(JNIEnv *env, jstring parentUTF, zjni_new_DatasetSet(env, list); /* Retrieve parent dataset */ - zhp = zfs_open(name, parent_typemask); + zhp = zfs_open(g_zfs, name, parent_typemask); if (zhp != NULL) { zjni_DatasetArrayCallbackData_t data = {0}; @@ -703,7 +704,7 @@ zjni_get_Datasets_below(JNIEnv *env, jstring parentUTF, /* Parent is not a dataset -- see if it's a faulted pool */ if ((parent_typemask & ZFS_TYPE_FILESYSTEM) && is_pool_name(name)) { - zpool_handle_t *zphp = zpool_open_canfail(name); + zpool_handle_t *zphp = zpool_open_canfail(g_zfs, name); if (zphp != NULL) { /* A faulted pool has no datasets */ @@ -750,7 +751,7 @@ zjni_get_Datasets_dependents(JNIEnv *env, jobjectArray paths) const char *path = (*env)->GetStringUTFChars(env, pathUTF, NULL); - zfs_handle_t *zhp = zfs_open(path, ZFS_TYPE_ANY); + zfs_handle_t *zhp = zfs_open(g_zfs, path, ZFS_TYPE_ANY); if (zhp != NULL) { /* Add all dependents of this Dataset to list */ (void) zfs_iter_dependents(zhp, @@ -762,7 +763,8 @@ zjni_get_Datasets_dependents(JNIEnv *env, jobjectArray paths) /* Path is not a dataset - see if it's a faulted pool */ if (is_pool_name(path)) { - zpool_handle_t *zphp = zpool_open_canfail(path); + zpool_handle_t *zphp = zpool_open_canfail(g_zfs, + path); if (zphp != NULL) { /* @@ -795,10 +797,10 @@ zjni_get_Dataset(JNIEnv *env, jstring nameUTF, zfs_type_t typemask) { jobject device = NULL; const char *name = (*env)->GetStringUTFChars(env, nameUTF, NULL); - zfs_handle_t *zhp = zfs_open(name, typemask); + zfs_handle_t *zhp = zfs_open(g_zfs, name, typemask); if ((typemask & ZFS_TYPE_FILESYSTEM) && is_pool_name(name)) { - zpool_handle_t *zphp = zpool_open_canfail(name); + zpool_handle_t *zphp = zpool_open_canfail(g_zfs, name); if (zphp != NULL) { device = create_PoolBean(env, zphp, zhp); diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c index 34500684d3..a699ecd7ce 100644 --- a/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c +++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c @@ -35,6 +35,8 @@ #include "libzfs_jni_diskmgt.h" #include "libzfs_jni_disk.h" +libzfs_handle_t *g_zfs; + /* * Function prototypes */ @@ -46,14 +48,14 @@ static void init(); * Static functions */ -char libzfs_err[1024]; +char libdskmgt_err[1024]; static void handle_error(const char *fmt, va_list ap) { /* Save the error message in case it's needed */ - (void) vsnprintf(libzfs_err, sizeof (libzfs_err), fmt, ap); + (void) vsnprintf(libdskmgt_err, sizeof (libdskmgt_err), fmt, ap); #ifdef DEBUG - (void) fprintf(stderr, "caught error: %s\n", libzfs_err); + (void) fprintf(stderr, "caught error: %s\n", libdskmgt_err); #endif } @@ -64,10 +66,8 @@ handle_error(const char *fmt, va_list ap) static void init() { - libzfs_err[0] = '\0'; - - /* libzfs error handler */ - zfs_set_error_handler(handle_error); + if ((g_zfs = libzfs_init()) == NULL) + abort(); /* diskmgt.o error handler */ dmgt_set_error_handler(handle_error); @@ -151,7 +151,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getPools(JNIEnv *env, jobject obj) data.env = env; data.list = (zjni_Collection_t *)list; - result = zpool_iter(zjni_create_add_Pool, &data); + result = zpool_iter(g_zfs, zjni_create_add_Pool, &data); if (result && (*env)->ExceptionOccurred(env) != NULL) { /* Must not call any more Java methods to preserve exception */ return (NULL); @@ -334,7 +334,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevice(JNIEnv *env, if (poolUTF != NULL) { const char *pool = (*env)->GetStringUTFChars(env, poolUTF, NULL); - zpool_handle_t *zhp = zpool_open_canfail(pool); + zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool); (*env)->ReleaseStringUTFChars(env, poolUTF, pool); if (zhp != NULL) { @@ -371,7 +371,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevices__Ljava_lang_Stri if (poolUTF != NULL) { const char *pool = (*env)->GetStringUTFChars(env, poolUTF, NULL); - zpool_handle_t *zhp = zpool_open_canfail(pool); + zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool); (*env)->ReleaseStringUTFChars(env, poolUTF, pool); /* Is the pool valid? */ @@ -408,7 +408,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevices__Ljava_lang_Stri if (poolUTF != NULL) { const char *pool = (*env)->GetStringUTFChars(env, poolUTF, NULL); - zpool_handle_t *zhp = zpool_open_canfail(pool); + zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool); (*env)->ReleaseStringUTFChars(env, poolUTF, pool); /* Is the pool valid? */ @@ -446,7 +446,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getAvailableDisks(JNIEnv *env, error = dmgt_avail_disk_iter(zjni_create_add_DiskDevice, &data); if (error) { - zjni_throw_exception(env, "%s", libzfs_err); + zjni_throw_exception(env, "%s", libdskmgt_err); } else { array = zjni_Collection_to_array( env, (zjni_Collection_t *)list, diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c index d9d09804ec..0e228460dc 100644 --- a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c +++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c @@ -26,6 +26,7 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include "libzfs_jni_util.h" #include "libzfs_jni_pool.h" #include <strings.h> @@ -1110,7 +1111,7 @@ zjni_pool_status_to_obj(JNIEnv *env, zpool_status_t status) int zjni_ipool_iter(int argc, char **argv, zjni_ipool_iter_f func, void *data) { - nvlist_t *pools = zpool_find_import(argc, argv); + nvlist_t *pools = zpool_find_import(g_zfs, argc, argv); if (pools != NULL) { nvpair_t *elem = NULL; diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h b/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h index 1b878a4977..b6989239ac 100644 --- a/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h +++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h @@ -32,6 +32,7 @@ #include <jni.h> #include <regex.h> #include <libnvpair.h> +#include <libzfs.h> #ifdef __cplusplus extern "C" { @@ -105,6 +106,8 @@ int zjni_count_elements(void **); nvpair_t *zjni_nvlist_walk_nvpair( nvlist_t *, const char *, data_type_t, nvpair_t *); +extern libzfs_handle_t *g_zfs; + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libzpool/common/util.c b/usr/src/lib/libzpool/common/util.c index 094c8b6c6f..df49adbc7a 100644 --- a/usr/src/lib/libzpool/common/util.c +++ b/usr/src/lib/libzpool/common/util.c @@ -111,11 +111,17 @@ show_vdev_stats(const char *desc, nvlist_t *nv, int indent) for (c = 0; c < children; c++) { nvlist_t *cnv = child[c]; - char *cname; + char *cname, *tname; + uint64_t np; if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) && nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname)) cname = "<unknown>"; - show_vdev_stats(cname, cnv, indent + 2); + tname = calloc(1, strlen(cname) + 2); + (void) strcpy(tname, cname); + if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0) + tname[strlen(tname)] = '0' + np; + show_vdev_stats(tname, cnv, indent + 2); + free(tname); } } diff --git a/usr/src/pkgdefs/SUNWfmd/prototype_com b/usr/src/pkgdefs/SUNWfmd/prototype_com index 01b0e4cea5..c99e4aca59 100644 --- a/usr/src/pkgdefs/SUNWfmd/prototype_com +++ b/usr/src/pkgdefs/SUNWfmd/prototype_com @@ -74,6 +74,8 @@ f none usr/lib/fm/fmd/plugins/syslog-msgs.conf 644 root bin f none usr/lib/fm/fmd/plugins/syslog-msgs.so 555 root bin f none usr/lib/fm/fmd/plugins/zfs-diagnosis.conf 644 root bin f none usr/lib/fm/fmd/plugins/zfs-diagnosis.so 555 root bin +f none usr/lib/fm/fmd/plugins/zfs-retire.conf 644 root bin +f none usr/lib/fm/fmd/plugins/zfs-retire.so 555 root bin d none usr/lib/fm/fmd/schemes 755 root bin f none usr/lib/fm/fmd/schemes/cpu.so 555 root bin f none usr/lib/fm/fmd/schemes/dev.so 555 root bin diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index ba07ea12d4..f979159f8a 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -356,8 +356,6 @@ buf_hash_find(spa_t *spa, dva_t *dva, uint64_t birth, kmutex_t **lockp) * will be returned and the new element will not be inserted. * Otherwise returns NULL. */ -static arc_buf_hdr_t *fbufs[4]; /* XXX to find 6341326 */ -static kthread_t *fbufs_lastthread; static arc_buf_hdr_t * buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp) { @@ -367,13 +365,10 @@ buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp) uint32_t max, i; ASSERT(!HDR_IN_HASH_TABLE(buf)); - fbufs_lastthread = curthread; *lockp = hash_lock; mutex_enter(hash_lock); for (fbuf = buf_hash_table.ht_table[idx], i = 0; fbuf != NULL; fbuf = fbuf->b_hash_next, i++) { - if (i < sizeof (fbufs) / sizeof (fbufs[0])) - fbufs[i] = fbuf; if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf)) return (fbuf); } diff --git a/usr/src/uts/common/fs/zfs/bplist.c b/usr/src/uts/common/fs/zfs/bplist.c index db0d3534d6..4442b1f28a 100644 --- a/usr/src/uts/common/fs/zfs/bplist.c +++ b/usr/src/uts/common/fs/zfs/bplist.c @@ -45,12 +45,13 @@ bplist_hold(bplist_t *bpl) uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) { - uint64_t obj; + int size; - obj = dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, - DMU_OT_BPLIST_HDR, sizeof (bplist_phys_t), tx); + size = spa_version(dmu_objset_spa(mos)) < ZFS_VERSION_BPLIST_ACCOUNT ? + BPLIST_SIZE_V0 : sizeof (bplist_phys_t); - return (obj); + return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, + DMU_OT_BPLIST_HDR, size, tx)); } void @@ -76,11 +77,14 @@ bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) ASSERT(bpl->bpl_cached_dbuf == NULL); ASSERT(bpl->bpl_queue == NULL); ASSERT(object != 0); + ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); + ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); bpl->bpl_mos = mos; bpl->bpl_object = object; bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; + bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); mutex_exit(&bpl->bpl_lock); return (0); @@ -210,7 +214,12 @@ bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx) dmu_buf_will_dirty(bpl->bpl_dbuf, tx); bpl->bpl_phys->bpl_entries++; - bpl->bpl_phys->bpl_bytes += BP_GET_ASIZE(bp); + bpl->bpl_phys->bpl_bytes += + bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp); + if (bpl->bpl_havecomp) { + bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp); + bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp); + } mutex_exit(&bpl->bpl_lock); return (0); @@ -259,5 +268,45 @@ bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) bpl->bpl_object, 0, -1ULL, tx)); bpl->bpl_phys->bpl_entries = 0; bpl->bpl_phys->bpl_bytes = 0; + if (bpl->bpl_havecomp) { + bpl->bpl_phys->bpl_comp = 0; + bpl->bpl_phys->bpl_uncomp = 0; + } + mutex_exit(&bpl->bpl_lock); +} + +int +bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) +{ + uint64_t itor = 0, comp = 0, uncomp = 0; + int err; + blkptr_t bp; + + mutex_enter(&bpl->bpl_lock); + + err = bplist_hold(bpl); + if (err) { + mutex_exit(&bpl->bpl_lock); + return (err); + } + + *usedp = bpl->bpl_phys->bpl_bytes; + if (bpl->bpl_havecomp) { + *compp = bpl->bpl_phys->bpl_comp; + *uncompp = bpl->bpl_phys->bpl_uncomp; + } mutex_exit(&bpl->bpl_lock); + + if (!bpl->bpl_havecomp) { + while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { + comp += BP_GET_PSIZE(&bp); + uncomp += BP_GET_UCSIZE(&bp); + } + if (err == ENOENT) + err = 0; + *compp = comp; + *uncompp = uncomp; + } + + return (err); } diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c index 2135427b7a..e78f49c4f9 100644 --- a/usr/src/uts/common/fs/zfs/dbuf.c +++ b/usr/src/uts/common/fs/zfs/dbuf.c @@ -1029,7 +1029,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * it's OK if we get an odd answer. */ dnode_willuse_space(dn, - -BP_GET_ASIZE(db->db_blkptr), tx); + -bp_get_dasize(os->os_spa, db->db_blkptr), tx); } dnode_willuse_space(dn, db->db.db_size, tx); } @@ -1951,8 +1951,8 @@ dbuf_sync(dmu_buf_impl_t *db, zio_t *zio, dmu_tx_t *tx) arc_buf_t **old = (arc_buf_t **)&db->db_d.db_data_old[txg&TXG_MASK]; blkptr_t **bpp = &db->db_d.db_overridden_by[txg&TXG_MASK]; - int old_size = BP_GET_ASIZE(db->db_blkptr); - int new_size = BP_GET_ASIZE(*bpp); + int old_size = bp_get_dasize(os->os_spa, db->db_blkptr); + int new_size = bp_get_dasize(os->os_spa, *bpp); ASSERT(db->db_blkid != DB_BONUS_BLKID); @@ -2078,8 +2078,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) dprintf_dbuf_bp(db, &zio->io_bp_orig, "bp_orig: %s", ""); - old_size = BP_GET_ASIZE(&zio->io_bp_orig); - new_size = BP_GET_ASIZE(zio->io_bp); + old_size = bp_get_dasize(os->os_spa, &zio->io_bp_orig); + new_size = bp_get_dasize(os->os_spa, zio->io_bp); dnode_diduse_space(dn, new_size-old_size); diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index 52c8413c9a..77886f5e24 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -789,7 +789,7 @@ replay_incremental_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) /* The point of no (unsuccessful) return. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_inconsistent = TRUE; + ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); return (0); @@ -841,7 +841,7 @@ replay_full_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) ds, drrb->drr_type, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_inconsistent = TRUE; + ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); return (0); @@ -875,7 +875,7 @@ replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_creation_time = drrb->drr_creation_time; ds->ds_phys->ds_guid = drrb->drr_toguid; - ds->ds_phys->ds_inconsistent = FALSE; + ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG); @@ -883,7 +883,7 @@ replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_STANDARD | DS_MODE_INCONSISTENT, FTAG, &ds)); dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_inconsistent = FALSE; + ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); return (0); @@ -1686,7 +1686,8 @@ dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) doi->doi_indirection = dn->dn_nlevels; doi->doi_checksum = dn->dn_checksum; doi->doi_compress = dn->dn_compress; - doi->doi_physical_blks = dn->dn_phys->dn_secphys; + doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + + SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; doi->doi_type = dn->dn_type; doi->doi_bonus_size = dn->dn_bonuslen; @@ -1735,7 +1736,9 @@ dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; *blksize = dn->dn_datablksz; - *nblk512 = dn->dn_phys->dn_secphys + 1; /* add 1 for dnode space */ + /* add 1 for dnode space */ + *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> + SPA_MINBLOCKSHIFT) + 1; } /* diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 248612e3cc..3d5f1f7b5c 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -164,9 +164,10 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the - * default (fletcher2/off). + * default (fletcher2/off). Snapshots don't need to know, and + * registering would complicate clone promotion. */ - if (ds) { + if (ds && ds->ds_phys->ds_num_children == 0) { err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) @@ -177,7 +178,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, kmem_free(osi, sizeof (objset_impl_t)); return (err); } - } else { + } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; @@ -329,21 +330,18 @@ dmu_objset_evict(dsl_dataset_t *ds, void *arg) { objset_impl_t *osi = arg; objset_t os; - int err, i; + int i; for (i = 0; i < TXG_SIZE; i++) { ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); } - if (ds) { - err = dsl_prop_unregister(ds, "checksum", - checksum_changed_cb, osi); - ASSERT(err == 0); - - err = dsl_prop_unregister(ds, "compression", - compression_changed_cb, osi); - ASSERT(err == 0); + if (ds && ds->ds_phys->ds_num_children == 0) { + VERIFY(0 == dsl_prop_unregister(ds, "checksum", + checksum_changed_cb, osi)); + VERIFY(0 == dsl_prop_unregister(ds, "compression", + compression_changed_cb, osi)); } /* diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c index 91ee5c5062..1b4a0c2bd0 100644 --- a/usr/src/uts/common/fs/zfs/dmu_tx.c +++ b/usr/src/uts/common/fs/zfs/dmu_tx.c @@ -181,8 +181,9 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) /* * For i/o error checking, read the first and last level-0 - * blocks, and all the level-1 blocks. We needn't do this on - * the meta-dnode, because we've already read it in. + * blocks (if they are not aligned), and all the level-1 blocks. + * We needn't do this on the meta-dnode, because we've already + * read it in. */ if (dn && dn->dn_object != DMU_META_DNODE_OBJECT) { @@ -199,16 +200,20 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) NULL, NULL, ZIO_FLAG_CANFAIL); /* first level-0 block */ - start = off/dn->dn_datablksz; - err = dmu_tx_check_ioerr(zio, dn, 0, start); - if (err) { - tx->tx_err = err; - return; + start = off >> dn->dn_datablkshift; + if (P2PHASE(off, dn->dn_datablksz) || + len < dn->dn_datablksz) { + err = dmu_tx_check_ioerr(zio, dn, 0, start); + if (err) { + tx->tx_err = err; + return; + } } /* last level-0 block */ - end = (off+len)/dn->dn_datablksz; - if (end != start) { + end = (off+len-1) >> dn->dn_datablkshift; + if (end != start && + P2PHASE(off+len, dn->dn_datablksz)) { err = dmu_tx_check_ioerr(zio, dn, 0, end); if (err) { tx->tx_err = err; @@ -330,6 +335,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) uint64_t blkid, nblks; uint64_t space = 0; dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; + spa_t *spa = tx->tx_pool->dp_spa; int dirty; /* @@ -388,7 +394,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) bp += blkid + i; if (dsl_dataset_block_freeable(ds, bp->blk_birth)) { dprintf_bp(bp, "can free old%s", ""); - space += BP_GET_ASIZE(bp); + space += bp_get_dasize(spa, bp); } } nblks = 0; @@ -423,7 +429,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) bp[i].blk_birth)) { dprintf_bp(&bp[i], "can free old%s", ""); - space += BP_GET_ASIZE(&bp[i]); + space += bp_get_dasize(spa, &bp[i]); } } dbuf_rele(dbuf, FTAG); diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c index 6de40f5081..43f1d4f135 100644 --- a/usr/src/uts/common/fs/zfs/dnode.c +++ b/usr/src/uts/common/fs/zfs/dnode.c @@ -177,17 +177,10 @@ dnode_byteswap(dnode_phys_t *dnp) return; } - dnp->dn_type = BSWAP_8(dnp->dn_type); - dnp->dn_indblkshift = BSWAP_8(dnp->dn_indblkshift); - dnp->dn_nlevels = BSWAP_8(dnp->dn_nlevels); - dnp->dn_nblkptr = BSWAP_8(dnp->dn_nblkptr); - dnp->dn_bonustype = BSWAP_8(dnp->dn_bonustype); - dnp->dn_checksum = BSWAP_8(dnp->dn_checksum); - dnp->dn_compress = BSWAP_8(dnp->dn_compress); dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec); dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen); dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid); - dnp->dn_secphys = BSWAP_64(dnp->dn_secphys); + dnp->dn_used = BSWAP_64(dnp->dn_used); /* * dn_nblkptr is only one byte, so it's OK to read it in either @@ -1110,27 +1103,29 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid) /* call from syncing context when we actually write/free space for this dnode */ void -dnode_diduse_space(dnode_t *dn, int64_t space) +dnode_diduse_space(dnode_t *dn, int64_t delta) { - uint64_t sectors; - - dprintf_dnode(dn, "dn=%p dnp=%p secphys=%llu space=%lld\n", + uint64_t space; + dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n", dn, dn->dn_phys, - (u_longlong_t)dn->dn_phys->dn_secphys, - (longlong_t)space); - - ASSERT(P2PHASE(space, 1<<DEV_BSHIFT) == 0); + (u_longlong_t)dn->dn_phys->dn_used, + (longlong_t)delta); mutex_enter(&dn->dn_mtx); - if (space > 0) { - sectors = space >> DEV_BSHIFT; - ASSERT3U(dn->dn_phys->dn_secphys + sectors, >=, - dn->dn_phys->dn_secphys); - dn->dn_phys->dn_secphys += sectors; + space = DN_USED_BYTES(dn->dn_phys); + if (delta > 0) { + ASSERT3U(space + delta, >=, space); /* no overflow */ + } else { + ASSERT3U(space, >=, -delta); /* no underflow */ + } + space += delta; + if (spa_version(dn->dn_objset->os_spa) < ZFS_VERSION_DNODE_BYTES) { + ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0); + ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0); + dn->dn_phys->dn_used = space >> DEV_BSHIFT; } else { - sectors = -space >> DEV_BSHIFT; - ASSERT3U(dn->dn_phys->dn_secphys, >=, sectors); - dn->dn_phys->dn_secphys -= sectors; + dn->dn_phys->dn_used = space; + dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES; } mutex_exit(&dn->dn_mtx); } diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index 80ac38c86a..5bb538980e 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -119,8 +119,8 @@ free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) if (BP_IS_HOLE(bp)) continue; - bytesfreed += BP_GET_ASIZE(bp); - ASSERT3U(bytesfreed >> DEV_BSHIFT, <=, dn->dn_phys->dn_secphys); + bytesfreed += bp_get_dasize(os->os_spa, bp); + ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys)); dsl_dataset_block_kill(os->os_dsl_dataset, bp, tx); } dnode_diduse_space(dn, -bytesfreed); @@ -457,7 +457,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) /* free up all the blocks in the file. */ dnode_sync_free_range(dn, 0, dn->dn_phys->dn_maxblkid+1, tx); - ASSERT3U(dn->dn_phys->dn_secphys, ==, 0); + ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0); /* ASSERT(blkptrs are zero); */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index 5b1de1b4b8..a199aec8de 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -28,6 +28,7 @@ #include <sys/dmu_objset.h> #include <sys/dsl_dataset.h> #include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> #include <sys/dmu_traverse.h> #include <sys/dmu_tx.h> #include <sys/arc.h> @@ -43,10 +44,6 @@ static int dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE -#define BP_GET_UCSIZE(bp) \ - ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ - BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); - /* * We use weighted reference counts to express the various forms of exclusion * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open @@ -68,7 +65,7 @@ static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) { - int used = BP_GET_ASIZE(bp); + int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); int compressed = BP_GET_PSIZE(bp); int uncompressed = BP_GET_UCSIZE(bp); @@ -105,7 +102,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) void dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) { - int used = BP_GET_ASIZE(bp); + int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); int compressed = BP_GET_PSIZE(bp); int uncompressed = BP_GET_UCSIZE(bp); @@ -155,8 +152,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) ds->ds_phys->ds_prev_snap_obj); ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); if (ds->ds_prev->ds_phys->ds_next_snap_obj == - ds->ds_object && - bp->blk_birth > + ds->ds_object && bp->blk_birth > ds->ds_prev->ds_phys->ds_prev_snap_txg) { dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); mutex_enter(&ds->ds_prev->ds_lock); @@ -373,7 +369,8 @@ dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, mutex_enter(&ds->ds_lock); if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && - ds->ds_phys->ds_inconsistent && !DS_MODE_IS_INCONSISTENT(mode)) || + (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && + !DS_MODE_IS_INCONSISTENT(mode)) || (ds->ds_open_refcount + weight > DOS_REF_MAX)) { mutex_exit(&ds->ds_lock); dsl_dataset_close(ds, DS_MODE_NONE, tag); @@ -842,7 +839,7 @@ kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) * Since this callback is not called concurrently, no lock is * needed on the accounting values. */ - *ka->usedp += BP_GET_ASIZE(bp); + *ka->usedp += bp_get_dasize(spa, bp); *ka->compressedp += BP_GET_PSIZE(bp); *ka->uncompressedp += BP_GET_UCSIZE(bp); /* XXX check for EIO? */ @@ -939,7 +936,7 @@ dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) ds->ds_prev->ds_phys->ds_compressed_bytes; ds->ds_phys->ds_uncompressed_bytes = ds->ds_prev->ds_phys->ds_uncompressed_bytes; - ds->ds_phys->ds_inconsistent = ds->ds_prev->ds_phys->ds_inconsistent; + ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; ds->ds_phys->ds_unique_bytes = 0; dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); @@ -969,7 +966,7 @@ dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) /* Mark it as inconsistent on-disk, in case we crash */ dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_inconsistent = TRUE; + ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; return (0); } @@ -1120,10 +1117,10 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) bp.blk_birth > ds_prev->ds_phys->ds_prev_snap_txg) { ds_prev->ds_phys->ds_unique_bytes += - BP_GET_ASIZE(&bp); + bp_get_dasize(dp->dp_spa, &bp); } } else { - used += BP_GET_ASIZE(&bp); + used += bp_get_dasize(dp->dp_spa, &bp); compressed += BP_GET_PSIZE(&bp); uncompressed += BP_GET_UCSIZE(&bp); /* XXX check return value? */ @@ -1169,7 +1166,7 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) bp.blk_birth <= ds->ds_phys->ds_creation_txg) { ds_next->ds_phys->ds_unique_bytes += - BP_GET_ASIZE(&bp); + bp_get_dasize(dp->dp_spa, &bp); } } @@ -1347,7 +1344,7 @@ dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; - dsphys->ds_inconsistent = ds->ds_phys->ds_inconsistent; + dsphys->ds_flags = ds->ds_phys->ds_flags; dsphys->ds_bp = ds->ds_phys->ds_bp; dmu_buf_rele(dbuf, FTAG); @@ -1424,7 +1421,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; } - dds->dds_inconsistent = ds->ds_phys->ds_inconsistent; + dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; @@ -1581,3 +1578,236 @@ dsl_dataset_rename(const char *osname, const char *newname) dsl_dir_close(dd, FTAG); return (err); } + +/* ARGSUSED */ +static int +dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +{ + dsl_dir_t *pdd = NULL; + dsl_dataset_t *ds = NULL; + dsl_dataset_t *hds = NULL; + dsl_dataset_t *phds = NULL; + dsl_dataset_t *pivot_ds = NULL; + dsl_dataset_t *newnext_ds = NULL; + int err; + char *name = NULL; + uint64_t used = 0, comp = 0, uncomp = 0, unique = 0, itor = 0; + blkptr_t bp; + + /* Check that it is a clone */ + if (dd->dd_phys->dd_clone_parent_obj == 0) + return (EINVAL); + + /* Open everyone */ + if (err = dsl_dataset_open_obj(dd->dd_pool, + dd->dd_phys->dd_clone_parent_obj, + NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) + goto out; + pdd = pivot_ds->ds_dir; + if (err = dsl_dataset_open_obj(dd->dd_pool, + pdd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &phds)) + goto out; + if (err = dsl_dataset_open_obj(dd->dd_pool, + dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)) + goto out; + + if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { + err = EXDEV; + goto out; + } + + /* find pivot point's new next ds */ + VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, + NULL, DS_MODE_NONE, FTAG, &newnext_ds)); + while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { + dsl_dataset_t *prev; + + if (err = dsl_dataset_open_obj(dd->dd_pool, + newnext_ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE, + FTAG, &prev)) + goto out; + dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); + newnext_ds = prev; + } + + /* compute pivot point's new unique space */ + while ((err = bplist_iterate(&newnext_ds->ds_deadlist, + &itor, &bp)) == 0) { + if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) + unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); + } + if (err != ENOENT) + goto out; + + /* need the config lock to ensure that the snapshots are not open */ + rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); + + /* Walk the snapshots that we are moving */ + name = kmem_alloc(MAXPATHLEN, KM_SLEEP); + ds = pivot_ds; + /* CONSTCOND */ + while (TRUE) { + uint64_t val, dlused, dlcomp, dluncomp; + dsl_dataset_t *prev; + + /* Check that the snapshot name does not conflict */ + dsl_dataset_name(ds, name); + err = zap_lookup(dd->dd_pool->dp_meta_objset, + hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, + 8, 1, &val); + if (err != ENOENT) { + if (err == 0) + err = EEXIST; + goto out; + } + + /* + * compute space to transfer. Each snapshot gave birth to: + * (my used) - (prev's used) + (deadlist's used) + */ + used += ds->ds_phys->ds_used_bytes; + comp += ds->ds_phys->ds_compressed_bytes; + uncomp += ds->ds_phys->ds_uncompressed_bytes; + + /* If we reach the first snapshot, we're done. */ + if (ds->ds_phys->ds_prev_snap_obj == 0) + break; + + if (err = bplist_space(&ds->ds_deadlist, + &dlused, &dlcomp, &dluncomp)) + goto out; + if (err = dsl_dataset_open_obj(dd->dd_pool, + ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, + FTAG, &prev)) + goto out; + used += dlused - prev->ds_phys->ds_used_bytes; + comp += dlcomp - prev->ds_phys->ds_compressed_bytes; + uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; + + /* + * We could be a clone of a clone. If we reach our + * parent's branch point, we're done. + */ + if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { + dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); + break; + } + if (ds != pivot_ds) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + ds = prev; + } + if (ds != pivot_ds) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + ds = NULL; + + /* Check that there is enough space here */ + if (err = dsl_dir_transfer_possible(pdd, dd, used)) + goto out; + + /* The point of no (unsuccessful) return */ + + /* move snapshots to this dir */ + ds = pivot_ds; + /* CONSTCOND */ + while (TRUE) { + dsl_dataset_t *prev; + + /* move snap name entry */ + dsl_dataset_name(ds, name); + VERIFY(0 == zap_remove(dd->dd_pool->dp_meta_objset, + phds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, tx)); + VERIFY(0 == zap_add(dd->dd_pool->dp_meta_objset, + hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, + 8, 1, &ds->ds_object, tx)); + + /* change containing dsl_dir */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); + ds->ds_phys->ds_dir_obj = dd->dd_object; + ASSERT3P(ds->ds_dir, ==, pdd); + dsl_dir_close(ds->ds_dir, ds); + VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, dd->dd_object, + NULL, ds, &ds->ds_dir)); + + ASSERT3U(dsl_prop_numcb(ds), ==, 0); + + if (ds->ds_phys->ds_prev_snap_obj == 0) + break; + + VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, + ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, + FTAG, &prev)); + + if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { + dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); + break; + } + if (ds != pivot_ds) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + ds = prev; + } + + /* change pivot point's next snap */ + dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); + pivot_ds->ds_phys->ds_next_snap_obj = newnext_ds->ds_object; + + /* change clone_parent-age */ + dmu_buf_will_dirty(dd->dd_dbuf, tx); + ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); + dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; + dmu_buf_will_dirty(pdd->dd_dbuf, tx); + pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; + + /* change space accounting */ + dsl_dir_diduse_space(pdd, -used, -comp, -uncomp, tx); + dsl_dir_diduse_space(dd, used, comp, uncomp, tx); + pivot_ds->ds_phys->ds_unique_bytes = unique; + + err = 0; + +out: + if (RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)) + rw_exit(&dd->dd_pool->dp_config_rwlock); + if (hds) + dsl_dataset_close(hds, DS_MODE_NONE, FTAG); + if (phds) + dsl_dataset_close(phds, DS_MODE_NONE, FTAG); + if (ds && ds != pivot_ds) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + if (pivot_ds) + dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); + if (newnext_ds) + dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); + if (name) + kmem_free(name, MAXPATHLEN); + return (err); +} + +int +dsl_dataset_promote(const char *name) +{ + dsl_dataset_t *ds; + int err; + dmu_object_info_t doi; + + err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); + if (err) + return (err); + + err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, &doi); + if (err) { + dsl_dataset_close(ds, DS_MODE_NONE, FTAG); + return (err); + } + + /* + * Add in 128x the snapnames zapobj size, since we will be moving + * a bunch of snapnames to the promoted ds, and dirtying their + * bonus buffers. + */ + err = dsl_dir_sync_task(ds->ds_dir, dsl_dataset_promote_sync, NULL, + (1<<20) + (doi.doi_physical_blks << (SPA_MINBLOCKSHIFT + 7))); + dsl_dataset_close(ds, DS_MODE_NONE, FTAG); + return (err); +} diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c index 8ffa145477..d7095cb0d3 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dir.c +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c @@ -739,7 +739,7 @@ dsl_dir_space_available(dsl_dir_t *dd, used += delta; if (dd->dd_parent == NULL) { - uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE); + uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE); quota = MIN(quota, poolsize); } @@ -754,23 +754,19 @@ dsl_dir_space_available(dsl_dir_t *dd, if (used > quota) { /* over quota */ myspace = 0; -#ifdef ZFS_DEBUG - { - /* - * While it's OK to be a little over quota, if - * we think we are using more space than there - * is in the pool (which is already 6% more than - * dsl_pool_adjustedsize()), something is very - * wrong. - */ - uint64_t space = spa_get_space(dd->dd_pool->dp_spa); - ASSERT3U(used, <=, space); - } -#endif + + /* + * While it's OK to be a little over quota, if + * we think we are using more space than there + * is in the pool (which is already 1.6% more than + * dsl_pool_adjustedsize()), something is very + * wrong. + */ + ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa)); } else { /* - * the lesser of parent's space and the space - * left in our quota + * the lesser of the space provided by our parent and + * the space left in our quota */ myspace = MIN(parentspace, quota - used); } @@ -1170,27 +1166,22 @@ dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) } if (newpds != dd->dd_parent) { - dsl_dir_t *ancestor; - int64_t adelta; - uint64_t myspace, avail; - - ancestor = closest_common_ancestor(dd, newpds); + /* is there enough space? */ + uint64_t myspace = + MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); /* no rename into our descendent */ - if (ancestor == dd) { + if (closest_common_ancestor(dd, newpds) == dd) { dsl_dir_close(newpds, FTAG); rw_exit(&dp->dp_config_rwlock); return (EINVAL); } - myspace = MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); - adelta = would_change(dd->dd_parent, -myspace, ancestor); - avail = dsl_dir_space_available(newpds, - ancestor, adelta, FALSE); - if (avail < myspace) { + if (err = dsl_dir_transfer_possible(dd->dd_parent, newpds, + myspace)) { dsl_dir_close(newpds, FTAG); rw_exit(&dp->dp_config_rwlock); - return (ENOSPC); + return (err); } /* The point of no (unsuccessful) return */ @@ -1227,3 +1218,19 @@ dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) rw_exit(&dp->dp_config_rwlock); return (0); } + +int +dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) +{ + dsl_dir_t *ancestor; + int64_t adelta; + uint64_t avail; + + ancestor = closest_common_ancestor(sdd, tdd); + adelta = would_change(sdd, -space, ancestor); + avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); + if (avail < space) + return (ENOSPC); + + return (0); +} diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c index 77a1adb3b1..d12e1acfeb 100644 --- a/usr/src/uts/common/fs/zfs/dsl_pool.c +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c @@ -241,7 +241,7 @@ dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) * cut the reservation in half to allow forward progress * (e.g. make it possible to rm(1) files from a full pool). */ - space = spa_get_space(dp->dp_spa); + space = spa_get_dspace(dp->dp_spa); resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1); if (netfree) resv >>= 1; diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c index fc33b1c591..0bb55f8b95 100644 --- a/usr/src/uts/common/fs/zfs/dsl_prop.c +++ b/usr/src/uts/common/fs/zfs/dsl_prop.c @@ -62,33 +62,28 @@ dodefault(const char *propname, int intsz, int numint, void *buf) } static int -dsl_prop_get_impl(dsl_pool_t *dp, uint64_t ddobj, const char *propname, +dsl_prop_get_impl(dsl_dir_t *dd, const char *propname, int intsz, int numint, void *buf, char *setpoint) { - int err = 0; - objset_t *mos = dp->dp_meta_objset; + int err = ENOENT; if (setpoint) setpoint[0] = '\0'; - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); - - while (ddobj != 0) { - dsl_dir_t *dd; - err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd); - if (err) - break; + /* + * Note: dd may be NULL, therefore we shouldn't dereference it + * ouside this loop. + */ + for (; dd != NULL; dd = dd->dd_parent) { + objset_t *mos = dd->dd_pool->dp_meta_objset; + ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock)); err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, propname, intsz, numint, buf); if (err != ENOENT) { if (setpoint) dsl_dir_name(dd, setpoint); - dsl_dir_close(dd, FTAG); break; } - ASSERT3U(err, ==, ENOENT); - ddobj = dd->dd_phys->dd_parent_obj; - dsl_dir_close(dd, FTAG); } if (err == ENOENT) err = dodefault(propname, intsz, numint, buf); @@ -107,27 +102,21 @@ int dsl_prop_register(dsl_dataset_t *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg) { - dsl_dir_t *dd; + dsl_dir_t *dd = ds->ds_dir; uint64_t value; dsl_prop_cb_record_t *cbr; int err; - dd = ds->ds_dir; - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); - err = dsl_prop_get_impl(dd->dd_pool, dd->dd_object, propname, - 8, 1, &value, NULL); - if (err == ENOENT) { - err = 0; - value = DSL_PROP_VALUE_UNDEFINED; - } + err = dsl_prop_get_impl(dd, propname, 8, 1, &value, NULL); if (err != 0) { rw_exit(&dd->dd_pool->dp_config_rwlock); return (err); } cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP); + cbr->cbr_ds = ds; cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_SLEEP); (void) strcpy((char *)cbr->cbr_propname, propname); cbr->cbr_func = callback; @@ -152,8 +141,7 @@ dsl_prop_get_ds(dsl_dir_t *dd, const char *propname, int err; rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); - err = dsl_prop_get_impl(dd->dd_pool, dd->dd_object, - propname, intsz, numints, buf, setpoint); + err = dsl_prop_get_impl(dd, propname, intsz, numints, buf, setpoint); rw_exit(&dd->dd_pool->dp_config_rwlock); return (err); @@ -222,17 +210,16 @@ int dsl_prop_unregister(dsl_dataset_t *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg) { - dsl_dir_t *dd; + dsl_dir_t *dd = ds->ds_dir; dsl_prop_cb_record_t *cbr; - dd = ds->ds_dir; - mutex_enter(&dd->dd_lock); for (cbr = list_head(&dd->dd_prop_cbs); cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) { - if (strcmp(cbr->cbr_propname, propname) == 0 && + if (cbr->cbr_ds == ds && cbr->cbr_func == callback && - cbr->cbr_arg == cbarg) + cbr->cbr_arg == cbarg && + strcmp(cbr->cbr_propname, propname) == 0) break; } @@ -251,6 +238,27 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname, return (0); } +/* + * Return the number of callbacks that are registered for this dataset. + */ +int +dsl_prop_numcb(dsl_dataset_t *ds) +{ + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_cb_record_t *cbr; + int num = 0; + + mutex_enter(&dd->dd_lock); + for (cbr = list_head(&dd->dd_prop_cbs); + cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) { + if (cbr->cbr_ds == ds) + num++; + } + mutex_exit(&dd->dd_lock); + + return (num); +} + static void dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, const char *propname, uint64_t value, int first) @@ -330,9 +338,8 @@ dsl_prop_set_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) if (err == ENOENT) /* that's fine. */ err = 0; if (err == 0 && isint) { - err = dsl_prop_get_impl(dd->dd_pool, - dd->dd_phys->dd_parent_obj, psa->name, - 8, 1, &intval, NULL); + err = dsl_prop_get_impl(dd->dd_parent, + psa->name, 8, 1, &intval, NULL); } } else { err = zap_update(mos, zapobj, psa->name, @@ -380,7 +387,7 @@ int dsl_prop_get_all(objset_t *os, nvlist_t **nvp) { dsl_dataset_t *ds = os->os->os_dsl_dataset; - dsl_dir_t *dd, *parent; + dsl_dir_t *dd = ds->ds_dir; int err = 0; dsl_pool_t *dp; objset_t *mos; @@ -395,15 +402,13 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp) return (0); } - dd = ds->ds_dir; - VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); dp = dd->dd_pool; mos = dp->dp_meta_objset; rw_enter(&dp->dp_config_rwlock, RW_READER); - while (dd != NULL) { + for (; dd != NULL; dd = dd->dd_parent) { dsl_dir_name(dd, setpoint); for (zap_cursor_init(&zc, mos, dd->dd_phys->dd_props_zapobj); @@ -418,7 +423,6 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp) /* * String property */ - tmp = kmem_alloc(za.za_num_integers, KM_SLEEP); err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, @@ -448,27 +452,9 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp) } zap_cursor_fini(&zc); - if (err != ENOENT) { - if (dd != ds->ds_dir) - dsl_dir_close(dd, FTAG); + if (err != ENOENT) break; - } else { - err = 0; - } - - /* - * Continue to parent. - */ - if (dd->dd_phys->dd_parent_obj == 0) - parent = NULL; - else - err = dsl_dir_open_obj(dp, - dd->dd_phys->dd_parent_obj, NULL, FTAG, &parent); - if (dd != ds->ds_dir) - dsl_dir_close(dd, FTAG); - if (err) - break; - dd = parent; + err = 0; } rw_exit(&dp->dp_config_rwlock); diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 2fe82c2e80..fca42558ef 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -185,33 +186,40 @@ spa_deactivate(spa_t *spa) * in the CLOSED state. This will prep the pool before open/creation/import. * All vdev validation is done by the vdev_alloc() routine. */ -static vdev_t * -spa_config_parse(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int atype) +static int +spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, + uint_t id, int atype) { nvlist_t **child; uint_t c, children; - vdev_t *vd; + int error; - if ((vd = vdev_alloc(spa, nv, parent, id, atype)) == NULL) - return (NULL); + if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) + return (error); - if (vd->vdev_ops->vdev_op_leaf) - return (vd); + if ((*vdp)->vdev_ops->vdev_op_leaf) + return (0); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { - vdev_free(vd); - return (NULL); + vdev_free(*vdp); + *vdp = NULL; + return (EINVAL); } for (c = 0; c < children; c++) { - if (spa_config_parse(spa, child[c], vd, c, atype) == NULL) { - vdev_free(vd); - return (NULL); + vdev_t *vd; + if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, + atype)) != 0) { + vdev_free(*vdp); + *vdp = NULL; + return (error); } } - return (vd); + ASSERT(*vdp != NULL); + + return (0); } /* @@ -220,6 +228,8 @@ spa_config_parse(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int atype) static void spa_unload(spa_t *spa) { + int i; + /* * Stop async tasks. */ @@ -254,10 +264,117 @@ spa_unload(spa_t *spa) vdev_free(spa->spa_root_vdev); ASSERT(spa->spa_root_vdev == NULL); + for (i = 0; i < spa->spa_nspares; i++) + vdev_free(spa->spa_spares[i]); + if (spa->spa_spares) { + kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); + spa->spa_spares = NULL; + } + if (spa->spa_sparelist) { + nvlist_free(spa->spa_sparelist); + spa->spa_sparelist = NULL; + } + spa->spa_async_suspended = 0; } /* + * Load (or re-load) the current list of vdevs describing the active spares for + * this pool. When this is called, we have some form of basic information in + * 'spa_sparelist'. We parse this into vdevs, try to open them, and then + * re-generate a more complete list including status information. + */ +static void +spa_load_spares(spa_t *spa) +{ + nvlist_t **spares; + uint_t nspares; + int i; + + /* + * First, close and free any existing spare vdevs. + */ + for (i = 0; i < spa->spa_nspares; i++) { + vdev_close(spa->spa_spares[i]); + vdev_free(spa->spa_spares[i]); + } + if (spa->spa_spares) + kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *)); + + if (spa->spa_sparelist == NULL) + nspares = 0; + else + VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); + + spa->spa_nspares = (int)nspares; + spa->spa_spares = NULL; + + if (nspares == 0) + return; + + /* + * Construct the array of vdevs, opening them to get status in the + * process. + */ + spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP); + for (i = 0; i < spa->spa_nspares; i++) { + vdev_t *vd; + + VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, + VDEV_ALLOC_SPARE) == 0); + ASSERT(vd != NULL); + + spa->spa_spares[i] = vd; + + if (vdev_open(vd) != 0) + continue; + + vd->vdev_top = vd; + (void) vdev_validate_spare(vd); + } + + /* + * Recompute the stashed list of spares, with status information + * this time. + */ + VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, + DATA_TYPE_NVLIST_ARRAY) == 0); + + spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP); + for (i = 0; i < spa->spa_nspares; i++) + spares[i] = vdev_config_generate(spa, spa->spa_spares[i], + B_TRUE, B_TRUE); + VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, + spares, spa->spa_nspares) == 0); + for (i = 0; i < spa->spa_nspares; i++) + nvlist_free(spares[i]); + kmem_free(spares, spa->spa_nspares * sizeof (void *)); +} + +static int +load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) +{ + dmu_buf_t *db; + char *packed = NULL; + size_t nvsize = 0; + int error; + *value = NULL; + + VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); + nvsize = *(uint64_t *)db->db_data; + dmu_buf_rele(db, FTAG); + + packed = kmem_alloc(nvsize, KM_SLEEP); + error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); + if (error == 0) + error = nvlist_unpack(packed, nvsize, value, 0); + kmem_free(packed, nvsize); + + return (error); +} + +/* * Load an existing storage pool, using the pool's builtin spa_config as a * source of configuration information. */ @@ -270,6 +387,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) uberblock_t *ub = &spa->spa_uberblock; uint64_t config_cache_txg = spa->spa_config_txg; uint64_t pool_guid; + uint64_t version; zio_t *zio; spa->spa_load_state = state; @@ -280,6 +398,13 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) goto out; } + /* + * Versioning wasn't explicitly added to the label until later, so if + * it's not present treat it as the initial version. + */ + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) + version = ZFS_VERSION_INITIAL; + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &spa->spa_config_txg); @@ -290,16 +415,17 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) } /* - * Parse the configuration into a vdev tree. + * Parse the configuration into a vdev tree. We explicitly set the + * value that will be returned by spa_version() since parsing the + * configuration requires knowing the version number. */ spa_config_enter(spa, RW_WRITER, FTAG); - rvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_LOAD); + spa->spa_ubsync.ub_version = version; + error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); spa_config_exit(spa, FTAG); - if (rvd == NULL) { - error = EINVAL; + if (error != 0) goto out; - } ASSERT(spa->spa_root_vdev == rvd); ASSERT(spa_guid(spa) == pool_guid); @@ -396,24 +522,9 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) } if (!mosconfig) { - dmu_buf_t *db; - char *packed = NULL; - size_t nvsize = 0; - nvlist_t *newconfig = NULL; - - VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, - spa->spa_config_object, FTAG, &db)); - nvsize = *(uint64_t *)db->db_data; - dmu_buf_rele(db, FTAG); - - packed = kmem_alloc(nvsize, KM_SLEEP); - error = dmu_read(spa->spa_meta_objset, - spa->spa_config_object, 0, nvsize, packed); - if (error == 0) - error = nvlist_unpack(packed, nvsize, &newconfig, 0); - kmem_free(packed, nvsize); + nvlist_t *newconfig; - if (error) { + if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); error = EIO; @@ -421,7 +532,6 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) } spa_config_set(spa, newconfig); - spa_unload(spa); spa_deactivate(spa); spa_activate(spa); @@ -439,6 +549,21 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) } /* + * Load the bit that tells us to use the new accounting function + * (raid-z deflation). If we have an older pool, this will not + * be present. + */ + error = zap_lookup(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, + sizeof (uint64_t), 1, &spa->spa_deflate); + if (error != 0 && error != ENOENT) { + vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + error = EIO; + goto out; + } + + /* * Load the persistent error log. If we have an older pool, this will * not be present. */ @@ -463,6 +588,32 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) } /* + * Load any hot spares for this pool. + */ + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object); + if (error != 0 && error != ENOENT) { + vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + error = EIO; + goto out; + } + if (error == 0) { + ASSERT(spa_version(spa) >= ZFS_VERSION_SPARES); + if (load_nvlist(spa, spa->spa_spares_object, + &spa->spa_sparelist) != 0) { + vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + error = EIO; + goto out; + } + + spa_config_enter(spa, RW_WRITER, FTAG); + spa_load_spares(spa); + spa_config_exit(spa, FTAG); + } + + /* * Load the vdev state for all toplevel vdevs. */ vdev_load(rvd); @@ -527,7 +678,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) error = 0; out: - if (error) + if (error && error != EBADF) zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0); spa->spa_load_state = SPA_LOAD_NONE; spa->spa_ena = 0; @@ -587,6 +738,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) * this is the case, the config cache is out of sync and * we should remove the pool from the namespace. */ + zfs_post_ok(spa, NULL); spa_unload(spa); spa_deactivate(spa); spa_remove(spa); @@ -678,6 +830,48 @@ spa_inject_delref(spa_t *spa) mutex_exit(&spa_namespace_lock); } +static void +spa_add_spares(spa_t *spa, nvlist_t *config) +{ + nvlist_t **spares; + uint_t i, nspares; + nvlist_t *nvroot; + uint64_t guid; + vdev_stat_t *vs; + uint_t vsc; + + if (spa->spa_nspares == 0) + return; + + VERIFY(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); + if (nspares != 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + VERIFY(nvlist_lookup_nvlist_array(nvroot, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); + + /* + * Go through and find any spares which have since been + * repurposed as an active spare. If this is the case, update + * their status appropriately. + */ + for (i = 0; i < nspares; i++) { + VERIFY(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + if (spa_spare_inuse(guid)) { + VERIFY(nvlist_lookup_uint64_array( + spares[i], ZPOOL_CONFIG_STATS, + (uint64_t **)&vs, &vsc) == 0); + vs->vs_state = VDEV_STATE_CANT_OPEN; + vs->vs_aux = VDEV_AUX_SPARED; + } + } + } +} + int spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) { @@ -687,10 +881,13 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) *config = NULL; error = spa_open_common(name, &spa, FTAG, config); - if (spa && *config != NULL) + if (spa && *config != NULL) { VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, spa_get_errlog_size(spa)) == 0); + spa_add_spares(spa, *config); + } + /* * We want to get the alternate root even for faulted pools, so we cheat * and call spa_lookup() directly. @@ -717,6 +914,65 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) } /* + * Validate that the 'spares' array is well formed. We must have an array of + * nvlists, each which describes a valid leaf vdev. + */ +static int +spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) +{ + nvlist_t **spares; + uint_t i, nspares; + vdev_t *vd; + int error; + + /* + * It's acceptable to have no spares specified. + */ + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) != 0) + return (0); + + if (nspares == 0) + return (EINVAL); + + /* + * Make sure the pool is formatted with a version that supports hot + * spares. + */ + if (spa_version(spa) < ZFS_VERSION_SPARES) + return (ENOTSUP); + + for (i = 0; i < nspares; i++) { + if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0, + mode)) != 0) + return (error); + + if (!vd->vdev_ops->vdev_op_leaf) { + vdev_free(vd); + return (EINVAL); + } + + if ((error = vdev_open(vd)) != 0) { + vdev_free(vd); + return (error); + } + + vd->vdev_top = vd; + if ((error = vdev_label_spare(vd, crtxg)) != 0) { + vdev_free(vd); + return (error); + } + + VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + + vdev_free(vd); + } + + return (0); +} + +/* * Pool Creation */ int @@ -726,8 +982,10 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot) vdev_t *rvd; dsl_pool_t *dp; dmu_tx_t *tx; - int c, error; + int c, error = 0; uint64_t txg = TXG_INITIAL; + nvlist_t **spares; + uint_t nspares; /* * If this pool already exists, return failure. @@ -753,23 +1011,26 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot) */ spa_config_enter(spa, RW_WRITER, FTAG); - rvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD); + error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); - ASSERT(spa->spa_root_vdev == rvd); + ASSERT(error != 0 || rvd != NULL); + ASSERT(error != 0 || spa->spa_root_vdev == rvd); - if (rvd == NULL) { + if (error == 0 && rvd->vdev_children == 0) error = EINVAL; - } else { - if ((error = vdev_create(rvd, txg)) == 0) { - for (c = 0; c < rvd->vdev_children; c++) - vdev_init(rvd->vdev_child[c], txg); - vdev_config_dirty(rvd); - } + + if (error == 0 && + (error = vdev_create(rvd, txg, B_FALSE)) == 0 && + (error = spa_validate_spares(spa, nvroot, txg, + VDEV_ALLOC_ADD)) == 0) { + for (c = 0; c < rvd->vdev_children; c++) + vdev_init(rvd->vdev_child[c], txg); + vdev_config_dirty(rvd); } spa_config_exit(spa, FTAG); - if (error) { + if (error != 0) { spa_unload(spa); spa_deactivate(spa); spa_remove(spa); @@ -777,6 +1038,21 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot) return (error); } + /* + * Get the list of spares, if specified. + */ + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + spa_config_enter(spa, RW_WRITER, FTAG); + spa_load_spares(spa); + spa_config_exit(spa, FTAG); + spa->spa_sync_spares = B_TRUE; + } + spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg); spa->spa_meta_objset = dp->dp_meta_objset; @@ -795,6 +1071,14 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot) cmn_err(CE_PANIC, "failed to add pool config"); } + /* Newly created pools are always deflated. */ + spa->spa_deflate = TRUE; + if (zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, + sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { + cmn_err(CE_PANIC, "failed to add deflate"); + } + /* * Create the deferred-free bplist object. Turn off compression * because sync-to-convergence takes longer if the blocksize @@ -838,6 +1122,9 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot) { spa_t *spa; int error; + nvlist_t *nvroot; + nvlist_t **spares; + uint_t nspares; if (!(spa_mode & FWRITE)) return (EROFS); @@ -864,7 +1151,25 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot) */ error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE); - if (error) { + spa_config_enter(spa, RW_WRITER, FTAG); + /* + * Toss any existing sparelist, as it doesn't have any validity anymore, + * and conflicts with spa_has_spare(). + */ + if (spa->spa_sparelist) { + nvlist_free(spa->spa_sparelist); + spa->spa_sparelist = NULL; + spa_load_spares(spa); + } + + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (error == 0) + error = spa_validate_spares(spa, nvroot, -1ULL, + VDEV_ALLOC_SPARE); + spa_config_exit(spa, FTAG); + + if (error != 0) { spa_unload(spa); spa_deactivate(spa); spa_remove(spa); @@ -873,6 +1178,26 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot) } /* + * Override any spares as specified by the user, as these may have + * correct device names/devids, etc. + */ + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + if (spa->spa_sparelist) + VERIFY(nvlist_remove(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); + else + VERIFY(nvlist_alloc(&spa->spa_sparelist, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + spa_config_enter(spa, RW_WRITER, FTAG); + spa_load_spares(spa); + spa_config_exit(spa, FTAG); + spa->spa_sync_spares = B_TRUE; + } + + /* * Update the config cache to include the newly-imported pool. */ spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); @@ -933,6 +1258,11 @@ spa_tryimport(nvlist_t *tryconfig) poolname) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state) == 0); + + /* + * Add the list of hot spares. + */ + spa_add_spares(spa, config); } spa_unload(spa); @@ -1083,26 +1413,80 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot) int c, error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *tvd; + nvlist_t **spares; + uint_t i, nspares; txg = spa_vdev_enter(spa); - vd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD); + if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, + VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, NULL, txg, error)); - if (vd == NULL) + if ((error = spa_validate_spares(spa, nvroot, txg, + VDEV_ALLOC_ADD)) != 0) + return (spa_vdev_exit(spa, vd, txg, error)); + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) != 0) + nspares = 0; + + if (vd->vdev_children == 0 && nspares == 0) return (spa_vdev_exit(spa, vd, txg, EINVAL)); - if ((error = vdev_create(vd, txg)) != 0) - return (spa_vdev_exit(spa, vd, txg, error)); + if (vd->vdev_children != 0) { + if ((error = vdev_create(vd, txg, B_FALSE)) != 0) + return (spa_vdev_exit(spa, vd, txg, error)); - /* - * Transfer each new top-level vdev from vd to rvd. - */ - for (c = 0; c < vd->vdev_children; c++) { - tvd = vd->vdev_child[c]; - vdev_remove_child(vd, tvd); - tvd->vdev_id = rvd->vdev_children; - vdev_add_child(rvd, tvd); - vdev_config_dirty(tvd); + /* + * Transfer each new top-level vdev from vd to rvd. + */ + for (c = 0; c < vd->vdev_children; c++) { + tvd = vd->vdev_child[c]; + vdev_remove_child(vd, tvd); + tvd->vdev_id = rvd->vdev_children; + vdev_add_child(rvd, tvd); + vdev_config_dirty(tvd); + } + } + + if (nspares != 0) { + if (spa->spa_sparelist != NULL) { + nvlist_t **oldspares; + uint_t oldnspares; + nvlist_t **newspares; + + VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0); + + newspares = kmem_alloc(sizeof (void *) * + (nspares + oldnspares), KM_SLEEP); + for (i = 0; i < oldnspares; i++) + VERIFY(nvlist_dup(oldspares[i], + &newspares[i], KM_SLEEP) == 0); + for (i = 0; i < nspares; i++) + VERIFY(nvlist_dup(spares[i], + &newspares[i + oldnspares], + KM_SLEEP) == 0); + + VERIFY(nvlist_remove(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); + + VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, newspares, + nspares + oldnspares) == 0); + for (i = 0; i < oldnspares + nspares; i++) + nvlist_free(newspares[i]); + kmem_free(newspares, (oldnspares + nspares) * + sizeof (void *)); + } else { + VERIFY(nvlist_alloc(&spa->spa_sparelist, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, spares, nspares) == 0); + } + + spa_load_spares(spa); + spa->spa_sync_spares = B_TRUE; } /* @@ -1147,7 +1531,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) int error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; - vdev_ops_t *pvops = replacing ? &vdev_replacing_ops : &vdev_mirror_ops; + vdev_ops_t *pvops; txg = spa_vdev_enter(spa); @@ -1161,18 +1545,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) pvd = oldvd->vdev_parent; - /* - * The parent must be a mirror or the root, unless we're replacing; - * in that case, the parent can be anything but another replacing vdev. - */ - if (pvd->vdev_ops != &vdev_mirror_ops && - pvd->vdev_ops != &vdev_root_ops && - (!replacing || pvd->vdev_ops == &vdev_replacing_ops)) - return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); - - newrootvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD); - - if (newrootvd == NULL || newrootvd->vdev_children != 1) + if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, + VDEV_ALLOC_ADD)) != 0 || newrootvd->vdev_children != 1) return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); newvd = newrootvd->vdev_child[0]; @@ -1180,9 +1554,43 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) if (!newvd->vdev_ops->vdev_op_leaf) return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); - if ((error = vdev_create(newrootvd, txg)) != 0) + if ((error = vdev_create(newrootvd, txg, replacing)) != 0) return (spa_vdev_exit(spa, newrootvd, txg, error)); + if (!replacing) { + /* + * For attach, the only allowable parent is a mirror or the root + * vdev. + */ + if (pvd->vdev_ops != &vdev_mirror_ops && + pvd->vdev_ops != &vdev_root_ops) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + + pvops = &vdev_mirror_ops; + } else { + /* + * Active hot spares can only be replaced by inactive hot + * spares. + */ + if (pvd->vdev_ops == &vdev_spare_ops && + pvd->vdev_child[1] == oldvd && + !spa_has_spare(spa, newvd->vdev_guid)) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + + /* + * If the source is a hot spare, and the parent isn't already a + * spare, then we want to create a new hot spare. Otherwise, we + * want to create a replacing vdev. + */ + if (pvd->vdev_ops == &vdev_replacing_ops) + return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); + else if (pvd->vdev_ops != &vdev_spare_ops && + newvd->vdev_isspare) + pvops = &vdev_spare_ops; + else + pvops = &vdev_replacing_ops; + } + /* * Compare the new device size with the replaceable/attachable * device size. @@ -1214,8 +1622,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) } /* - * If the parent is not a mirror, or if we're replacing, - * insert the new mirror/replacing vdev above oldvd. + * If the parent is not a mirror, or if we're replacing, insert the new + * mirror/replacing/spare vdev above oldvd. */ if (pvd->vdev_ops != pvops) pvd = vdev_add_parent(oldvd, pvops); @@ -1283,6 +1691,8 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) int c, t, error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *pvd, *cvd, *tvd; + boolean_t unspare = B_FALSE; + uint64_t unspare_guid; txg = spa_vdev_enter(spa); @@ -1298,17 +1708,27 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) /* * If replace_done is specified, only remove this device if it's - * the first child of a replacing vdev. - */ - if (replace_done && - (vd->vdev_id != 0 || pvd->vdev_ops != &vdev_replacing_ops)) - return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + * the first child of a replacing vdev. For the 'spare' vdev, either + * disk can be removed. + */ + if (replace_done) { + if (pvd->vdev_ops == &vdev_replacing_ops) { + if (vd->vdev_id != 0) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + } else if (pvd->vdev_ops != &vdev_spare_ops) { + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + } + } + + ASSERT(pvd->vdev_ops != &vdev_spare_ops || + spa_version(spa) >= ZFS_VERSION_SPARES); /* - * Only mirror and replacing vdevs support detach. + * Only mirror, replacing, and spare vdevs support detach. */ if (pvd->vdev_ops != &vdev_replacing_ops && - pvd->vdev_ops != &vdev_mirror_ops) + pvd->vdev_ops != &vdev_mirror_ops && + pvd->vdev_ops != &vdev_spare_ops) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); /* @@ -1339,10 +1759,25 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) if (!dirty) break; } - if (c == pvd->vdev_children) + + /* + * If we are a replacing or spare vdev, then we can always detach the + * latter child, as that is how one cancels the operation. + */ + if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) && + c == pvd->vdev_children) return (spa_vdev_exit(spa, NULL, txg, EBUSY)); /* + * If we are detaching the original disk from a spare, then it implies + * that the spare should become a real disk, and be removed from the + * active spare list for the pool. + */ + if (pvd->vdev_ops == &vdev_spare_ops && + vd->vdev_id == 0) + unspare = B_TRUE; + + /* * Erase the disk labels so the disk can be used for other things. * This must be done after all other error cases are handled, * but before we disembowel vd (so we can still do I/O to it). @@ -1350,7 +1785,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) * it may be that the unwritability of the disk is the reason * it's being detached! */ - error = vdev_label_init(vd, 0); + error = vdev_label_init(vd, 0, B_FALSE); if (error) dprintf("unable to erase labels on %s\n", vdev_description(vd)); @@ -1366,6 +1801,19 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) cvd = pvd->vdev_child[0]; /* + * If we need to remove the remaining child from the list of hot spares, + * do it now, marking the vdev as no longer a spare in the process. We + * must do this before vdev_remove_parent(), because that can change the + * GUID if it creates a new toplevel GUID. + */ + if (unspare) { + ASSERT(cvd->vdev_isspare); + spa_spare_remove(cvd->vdev_guid); + cvd->vdev_isspare = B_FALSE; + unspare_guid = cvd->vdev_guid; + } + + /* * If the parent mirror/replacing vdev only has one child, * the parent is no longer needed. Remove it from the tree. */ @@ -1408,7 +1856,104 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) dprintf("detached %s in txg %llu\n", vd->vdev_path, txg); - return (spa_vdev_exit(spa, vd, txg, 0)); + error = spa_vdev_exit(spa, vd, txg, 0); + + /* + * If we are supposed to remove the given vdev from the list of spares, + * iterate over all pools in the system and replace it if it's present. + */ + if (unspare) { + spa = NULL; + mutex_enter(&spa_namespace_lock); + while ((spa = spa_next(spa)) != NULL) { + if (spa->spa_state != POOL_STATE_ACTIVE) + continue; + + (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); + } + mutex_exit(&spa_namespace_lock); + } + + return (error); +} + +/* + * Remove a device from the pool. Currently, this supports removing only hot + * spares. + */ +int +spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) +{ + vdev_t *vd; + nvlist_t **spares, *nv, **newspares; + uint_t i, j, nspares; + int ret = 0; + + spa_config_enter(spa, RW_WRITER, FTAG); + + vd = spa_lookup_by_guid(spa, guid); + + nv = NULL; + if (spa->spa_spares != NULL && + nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + for (i = 0; i < nspares; i++) { + uint64_t theguid; + + VERIFY(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &theguid) == 0); + if (theguid == guid) { + nv = spares[i]; + break; + } + } + } + + /* + * We only support removing a hot spare, and only if it's not currently + * in use in this pool. + */ + if (nv == NULL && vd == NULL) { + ret = ENOENT; + goto out; + } + + if (nv == NULL && vd != NULL) { + ret = ENOTSUP; + goto out; + } + + if (!unspare && nv != NULL && vd != NULL) { + ret = EBUSY; + goto out; + } + + if (nspares == 1) { + newspares = NULL; + } else { + newspares = kmem_alloc((nspares - 1) * sizeof (void *), + KM_SLEEP); + for (i = 0, j = 0; i < nspares; i++) { + if (spares[i] != nv) + VERIFY(nvlist_dup(spares[i], + &newspares[j++], KM_SLEEP) == 0); + } + } + + VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, + DATA_TYPE_NVLIST_ARRAY) == 0); + VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES, + newspares, nspares - 1) == 0); + for (i = 0; i < nspares - 1; i++) + nvlist_free(newspares[i]); + kmem_free(newspares, (nspares - 1) * sizeof (void *)); + spa_load_spares(spa); + spa->spa_sync_spares = B_TRUE; + +out: + spa_config_exit(spa, FTAG); + + return (ret); } /* @@ -1446,15 +1991,31 @@ static void spa_vdev_replace_done(spa_t *spa) { vdev_t *vd; + vdev_t *pvd; uint64_t guid; + uint64_t pguid = 0; spa_config_enter(spa, RW_READER, FTAG); while ((vd = spa_vdev_replace_done_hunt(spa->spa_root_vdev)) != NULL) { guid = vd->vdev_guid; + /* + * If we have just finished replacing a hot spared device, then + * we need to detach the parent's first child (the original hot + * spare) as well. + */ + pvd = vd->vdev_parent; + if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && + pvd->vdev_id == 0) { + ASSERT(pvd->vdev_ops == &vdev_replacing_ops); + ASSERT(pvd->vdev_parent->vdev_children == 2); + pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; + } spa_config_exit(spa, FTAG); if (spa_vdev_detach(spa, guid, B_TRUE) != 0) return; + if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) + return; spa_config_enter(spa, RW_READER, FTAG); } @@ -1475,8 +2036,36 @@ spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) txg = spa_vdev_enter(spa); - if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) - return (spa_vdev_exit(spa, NULL, txg, ENOENT)); + if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) { + /* + * Determine if this is a reference to a hot spare. In that + * case, update the path as stored in the spare list. + */ + nvlist_t **spares; + uint_t i, nspares; + if (spa->spa_sparelist != NULL) { + VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); + for (i = 0; i < nspares; i++) { + uint64_t theguid; + VERIFY(nvlist_lookup_uint64(spares[i], + ZPOOL_CONFIG_GUID, &theguid) == 0); + if (theguid == guid) + break; + } + + if (i == nspares) + return (spa_vdev_exit(spa, NULL, txg, ENOENT)); + + VERIFY(nvlist_add_string(spares[i], + ZPOOL_CONFIG_PATH, newpath) == 0); + spa_load_spares(spa); + spa->spa_sync_spares = B_TRUE; + return (spa_vdev_exit(spa, NULL, txg, 0)); + } else { + return (spa_vdev_exit(spa, NULL, txg, ENOENT)); + } + } if (!vd->vdev_ops->vdev_op_leaf) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); @@ -2049,41 +2638,92 @@ spa_sync_deferred_frees(spa_t *spa, uint64_t txg) } static void -spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) +spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) { - nvlist_t *config; char *packed = NULL; size_t nvsize = 0; dmu_buf_t *db; - if (list_is_empty(&spa->spa_dirty_list)) - return; - - config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); - - if (spa->spa_config_syncing) - nvlist_free(spa->spa_config_syncing); - spa->spa_config_syncing = config; - - VERIFY(nvlist_size(config, &nvsize, NV_ENCODE_XDR) == 0); + VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); packed = kmem_alloc(nvsize, KM_SLEEP); - VERIFY(nvlist_pack(config, &packed, &nvsize, NV_ENCODE_XDR, + VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); - dmu_write(spa->spa_meta_objset, spa->spa_config_object, 0, nvsize, - packed, tx); + dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx); kmem_free(packed, nvsize); - VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, - spa->spa_config_object, FTAG, &db)); + VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = nvsize; dmu_buf_rele(db, FTAG); } +static void +spa_sync_spares(spa_t *spa, dmu_tx_t *tx) +{ + nvlist_t *nvroot; + nvlist_t **spares; + int i; + + if (!spa->spa_sync_spares) + return; + + /* + * Update the MOS nvlist describing the list of available spares. + * spa_validate_spares() will have already made sure this nvlist is + * valid and the vdevs are labelled appropriately. + */ + if (spa->spa_spares_object == 0) { + spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset, + DMU_OT_PACKED_NVLIST, 1 << 14, + DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); + VERIFY(zap_update(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES, + sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0); + } + + VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); + if (spa->spa_nspares == 0) { + VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + NULL, 0) == 0); + } else { + spares = kmem_alloc(spa->spa_nspares * sizeof (void *), + KM_SLEEP); + for (i = 0; i < spa->spa_nspares; i++) + spares[i] = vdev_config_generate(spa, + spa->spa_spares[i], B_FALSE, B_TRUE); + VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + spares, spa->spa_nspares) == 0); + for (i = 0; i < spa->spa_nspares; i++) + nvlist_free(spares[i]); + kmem_free(spares, spa->spa_nspares * sizeof (void *)); + } + + spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx); + + spa->spa_sync_spares = B_FALSE; +} + +static void +spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) +{ + nvlist_t *config; + + if (list_is_empty(&spa->spa_dirty_list)) + return; + + config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE); + + if (spa->spa_config_syncing) + nvlist_free(spa->spa_config_syncing); + spa->spa_config_syncing = config; + + spa_sync_nvlist(spa, spa->spa_config_object, config, tx); +} + /* * Sync the specified transaction group. New blocks may be dirtied as * part of the process, so we iterate until it converges. @@ -2109,6 +2749,29 @@ spa_sync(spa_t *spa, uint64_t txg) VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); + tx = dmu_tx_create_assigned(dp, txg); + + /* + * If we are upgrading to ZFS_VERSION_RAIDZ_DEFLATE this txg, + * set spa_deflate if we have no raid-z vdevs. + */ + if (spa->spa_ubsync.ub_version < ZFS_VERSION_RAIDZ_DEFLATE && + spa->spa_uberblock.ub_version >= ZFS_VERSION_RAIDZ_DEFLATE) { + int i; + + for (i = 0; i < rvd->vdev_children; i++) { + vd = rvd->vdev_child[i]; + if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) + break; + } + if (i == rvd->vdev_children) { + spa->spa_deflate = TRUE; + VERIFY(0 == zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, + sizeof (uint64_t), 1, &spa->spa_deflate, tx)); + } + } + /* * If anything has changed in this txg, push the deferred frees * from the previous txg. If not, leave them alone so that we @@ -2124,12 +2787,9 @@ spa_sync(spa_t *spa, uint64_t txg) do { spa->spa_sync_pass++; - tx = dmu_tx_create_assigned(dp, txg); spa_sync_config_object(spa, tx); - dmu_tx_commit(tx); - + spa_sync_spares(spa, tx); spa_errlog_sync(spa, txg); - dsl_pool_sync(dp, txg); dirty_vdevs = 0; @@ -2138,10 +2798,7 @@ spa_sync(spa_t *spa, uint64_t txg) dirty_vdevs++; } - tx = dmu_tx_create_assigned(dp, txg); bplist_sync(bpl, tx); - dmu_tx_commit(tx); - } while (dirty_vdevs); bplist_close(bpl); @@ -2175,6 +2832,8 @@ spa_sync(spa_t *spa, uint64_t txg) VERIFY(vdev_config_sync(rvd, txg) == 0); } + dmu_tx_commit(tx); + /* * Clear the dirty config list. */ @@ -2219,7 +2878,7 @@ spa_sync(spa_t *spa, uint64_t txg) /* * It had better be the case that we didn't dirty anything - * since spa_sync_labels(). + * since vdev_config_sync(). */ ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); @@ -2319,4 +2978,18 @@ spa_upgrade(spa_t *spa) vdev_config_dirty(spa->spa_root_vdev); spa_config_exit(spa, FTAG); + + txg_wait_synced(spa_get_dsl(spa), 0); +} + +boolean_t +spa_has_spare(spa_t *spa, uint64_t guid) +{ + int i; + + for (i = 0; i < spa->spa_nspares; i++) + if (spa->spa_spares[i]->vdev_guid == guid) + return (B_TRUE); + + return (B_FALSE); } diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index 906f2e5470..03ba60b0e3 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -279,7 +280,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, - spa->spa_uberblock.ub_version) == 0); + spa_version(spa)) == 0); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, spa_name(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, @@ -294,10 +295,13 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) vd->vdev_top->vdev_guid) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); + if (vd->vdev_isspare) + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, + 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } - nvroot = vdev_config_generate(vd, getstats); + nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c index 11267729d9..3d2ec9f0b7 100644 --- a/usr/src/uts/common/fs/zfs/spa_misc.c +++ b/usr/src/uts/common/fs/zfs/spa_misc.c @@ -175,6 +175,9 @@ static kcondvar_t spa_namespace_cv; static int spa_active_count; static int spa_max_replication_override = SPA_DVAS_PER_BP; +static avl_tree_t spa_spare_avl; +static kmutex_t spa_spare_lock; + kmem_cache_t *spa_buffer_pool; int spa_mode; @@ -338,6 +341,99 @@ spa_refcount_zero(spa_t *spa) /* * ========================================================================== + * SPA spare tracking + * ========================================================================== + */ + +/* + * We track spare information on a global basis. This allows us to do two + * things: determine when a spare is no longer referenced by any active pool, + * and (quickly) determine if a spare is currently in use in another pool on the + * system. + */ +typedef struct spa_spare { + uint64_t spare_guid; + avl_node_t spare_avl; + int spare_count; +} spa_spare_t; + +static int +spa_spare_compare(const void *a, const void *b) +{ + const spa_spare_t *sa = a; + const spa_spare_t *sb = b; + + if (sa->spare_guid < sb->spare_guid) + return (-1); + else if (sa->spare_guid > sb->spare_guid) + return (1); + else + return (0); +} + +void +spa_spare_add(uint64_t guid) +{ + avl_index_t where; + spa_spare_t search; + spa_spare_t *spare; + + mutex_enter(&spa_spare_lock); + + search.spare_guid = guid; + if ((spare = avl_find(&spa_spare_avl, &search, &where)) != NULL) { + spare->spare_count++; + } else { + spare = kmem_alloc(sizeof (spa_spare_t), KM_SLEEP); + spare->spare_guid = guid; + spare->spare_count = 1; + avl_insert(&spa_spare_avl, spare, where); + } + + mutex_exit(&spa_spare_lock); +} + +void +spa_spare_remove(uint64_t guid) +{ + spa_spare_t search; + spa_spare_t *spare; + avl_index_t where; + + mutex_enter(&spa_spare_lock); + + search.spare_guid = guid; + spare = avl_find(&spa_spare_avl, &search, &where); + + ASSERT(spare != NULL); + + if (--spare->spare_count == 0) { + avl_remove(&spa_spare_avl, spare); + kmem_free(spare, sizeof (spa_spare_t)); + } + + mutex_exit(&spa_spare_lock); +} + +boolean_t +spa_spare_inuse(uint64_t guid) +{ + spa_spare_t search; + avl_index_t where; + boolean_t ret; + + mutex_enter(&spa_spare_lock); + + search.spare_guid = guid; + ret = (avl_find(&spa_spare_avl, &search, &where) != NULL); + + mutex_exit(&spa_spare_lock); + + return (ret); +} + +/* + * ========================================================================== * SPA config locking * ========================================================================== */ @@ -779,7 +875,7 @@ spa_metaslab_class_select(spa_t *spa) } /* - * Return pool-wide allocated space. + * Return how much space is allocated in the pool (ie. sum of all asize) */ uint64_t spa_get_alloc(spa_t *spa) @@ -788,7 +884,7 @@ spa_get_alloc(spa_t *spa) } /* - * Return pool-wide allocated space. + * Return how much (raid-z inflated) space there is in the pool. */ uint64_t spa_get_space(spa_t *spa) @@ -796,6 +892,18 @@ spa_get_space(spa_t *spa) return (spa->spa_root_vdev->vdev_stat.vs_space); } +/* + * Return the amount of raid-z-deflated space in the pool. + */ +uint64_t +spa_get_dspace(spa_t *spa) +{ + if (spa->spa_deflate) + return (spa->spa_root_vdev->vdev_stat.vs_dspace); + else + return (spa->spa_root_vdev->vdev_stat.vs_space); +} + /* ARGSUSED */ uint64_t spa_get_asize(spa_t *spa, uint64_t lsize) @@ -828,6 +936,23 @@ spa_max_replication(spa_t *spa) return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override)); } +uint64_t +bp_get_dasize(spa_t *spa, const blkptr_t *bp) +{ + int sz = 0, i; + + if (!spa->spa_deflate) + return (BP_GET_ASIZE(bp)); + + for (i = 0; i < SPA_DVAS_PER_BP; i++) { + vdev_t *vd = + vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[i])); + sz += (DVA_GET_ASIZE(&bp->blk_dva[i]) >> SPA_MINBLOCKSHIFT) * + vd->vdev_deflate_ratio; + } + return (sz); +} + /* * ========================================================================== * Initialization and Termination @@ -864,6 +989,9 @@ spa_init(int mode) avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t), offsetof(spa_t, spa_avl)); + avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_spare_t), + offsetof(spa_spare_t, spare_avl)); + spa_mode = mode; refcount_init(); @@ -885,6 +1013,7 @@ spa_fini(void) refcount_fini(); avl_destroy(&spa_namespace_avl); + avl_destroy(&spa_spare_avl); cv_destroy(&spa_namespace_cv); mutex_destroy(&spa_namespace_lock); diff --git a/usr/src/uts/common/fs/zfs/sys/bplist.h b/usr/src/uts/common/fs/zfs/sys/bplist.h index c716fe7aa6..b4c83765c8 100644 --- a/usr/src/uts/common/fs/zfs/sys/bplist.h +++ b/usr/src/uts/common/fs/zfs/sys/bplist.h @@ -45,8 +45,12 @@ typedef struct bplist_phys { */ uint64_t bpl_entries; uint64_t bpl_bytes; + uint64_t bpl_comp; + uint64_t bpl_uncomp; } bplist_phys_t; +#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t)) + typedef struct bplist_q { blkptr_t bpq_blk; void *bpq_next; @@ -56,8 +60,9 @@ typedef struct bplist { kmutex_t bpl_lock; objset_t *bpl_mos; uint64_t bpl_object; - int bpl_blockshift; - int bpl_bpshift; + uint8_t bpl_blockshift; + uint8_t bpl_bpshift; + uint8_t bpl_havecomp; bplist_q_t *bpl_queue; bplist_phys_t *bpl_phys; dmu_buf_t *bpl_dbuf; @@ -74,6 +79,8 @@ extern int bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx); extern void bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp); extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx); extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx); +extern int bplist_space(bplist_t *bpl, + uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); #ifdef __cplusplus } diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index 78dd9632e6..88b59a1618 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -177,12 +177,17 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr); */ typedef void dmu_byteswap_func_t(void *buf, size_t size); +/* + * The names of zap entries in the DIRECTORY_OBJECT of the MOS. + */ #define DMU_POOL_DIRECTORY_OBJECT 1 #define DMU_POOL_CONFIG "config" #define DMU_POOL_ROOT_DATASET "root_dataset" #define DMU_POOL_SYNC_BPLIST "sync_bplist" #define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" #define DMU_POOL_ERRLOG_LAST "errlog_last" +#define DMU_POOL_SPARES "spares" +#define DMU_POOL_DEFLATE "deflate" /* * Allocate an object from this objset. The range of object numbers diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h index d2c9d4f3bf..48b06a6749 100644 --- a/usr/src/uts/common/fs/zfs/sys/dnode.h +++ b/usr/src/uts/common/fs/zfs/sys/dnode.h @@ -75,6 +75,9 @@ extern "C" { #define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) +#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \ + (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT) + #define EPB(blkshift, typeshift) (1 << (blkshift - typeshift)) struct dmu_buf_impl; @@ -87,6 +90,9 @@ enum dnode_dirtycontext { DN_DIRTY_SYNC }; +/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */ +#define DNODE_FLAG_USED_BYTES (1<<0) + typedef struct dnode_phys { uint8_t dn_type; /* dmu_object_type_t */ uint8_t dn_indblkshift; /* ln2(indirect block size) */ @@ -95,14 +101,14 @@ typedef struct dnode_phys { uint8_t dn_bonustype; /* type of data in bonus buffer */ uint8_t dn_checksum; /* ZIO_CHECKSUM type */ uint8_t dn_compress; /* ZIO_COMPRESS type */ - uint8_t dn_pad1[1]; + uint8_t dn_flags; /* DNODE_FLAG_* */ uint16_t dn_datablkszsec; /* data block size in 512b sectors */ uint16_t dn_bonuslen; /* length of dn_bonus */ uint8_t dn_pad2[4]; /* accounting is protected by dn_dirty_mtx */ uint64_t dn_maxblkid; /* largest allocated block ID */ - uint64_t dn_secphys; /* 512b sectors of disk space used */ + uint64_t dn_used; /* bytes (or sectors) of disk space */ uint64_t dn_pad3[4]; diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index 2a4ce242dc..912445b160 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -44,6 +44,15 @@ struct dsl_pool; typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *); +#define DS_FLAG_INCONSISTENT (1ULL<<0) +/* + * NB: nopromote can not yet be set, but we want support for it in this + * on-disk version, so that we don't need to upgrade for it later. It + * will be needed when we implement 'zfs split' (where the split off + * clone should not be promoted). + */ +#define DS_FLAG_NOPROMOTE (1ULL<<1) + typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; uint64_t ds_prev_snap_obj; @@ -65,9 +74,9 @@ typedef struct dsl_dataset_phys { */ uint64_t ds_fsid_guid; uint64_t ds_guid; - uint64_t ds_inconsistent; /* boolean */ + uint64_t ds_flags; blkptr_t ds_bp; - uint64_t ds_pad[8]; /* pad out to 256 bytes for good measure */ + uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */ } dsl_dataset_phys_t; typedef struct dsl_dataset { @@ -119,6 +128,7 @@ int dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx); int dsl_dataset_rollback(const char *name); int dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx); int dsl_dataset_rename(const char *name, const char *newname); +int dsl_dataset_promote(const char *name); void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds, void *p, dsl_dataset_evict_func_t func); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h index 5c23fdc497..123d6d128f 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h @@ -121,6 +121,7 @@ int dsl_dir_sync_task(dsl_dir_t *dd, int dsl_dir_set_quota(const char *ddname, uint64_t quota); int dsl_dir_set_reservation(const char *ddname, uint64_t reservation); int dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx); +int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space); #ifdef ZFS_DEBUG #define dprintf_dd(dd, fmt, ...) do { \ diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h index bf03cfa799..95094641c5 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -42,10 +41,9 @@ struct dsl_dataset; /* The callback func may not call into the DMU or DSL! */ typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval); -#define DSL_PROP_VALUE_UNDEFINED (-1ULL) - typedef struct dsl_prop_cb_record { list_node_t cbr_node; /* link on dd_prop_cbs */ + struct dsl_dataset *cbr_ds; const char *cbr_propname; dsl_prop_changed_cb_t *cbr_func; void *cbr_arg; @@ -55,6 +53,7 @@ int dsl_prop_register(struct dsl_dataset *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg); int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg); +int dsl_prop_numcb(struct dsl_dataset *ds); int dsl_prop_get(const char *ddname, const char *propname, int intsz, int numints, void *buf, char *setpoint); diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index a51cfd524f..829c025af2 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -232,7 +232,11 @@ typedef struct blkptr { #define BP_GET_ASIZE(bp) \ (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - DVA_GET_ASIZE(&(bp)->blk_dva[2])) + DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define BP_GET_UCSIZE(bp) \ + ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \ + BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); #define BP_GET_NDVAS(bp) \ (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ @@ -326,8 +330,14 @@ extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot); extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing); extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done); +extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare); extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath); +/* spare state (which is global across all pools) */ +extern void spa_spare_add(uint64_t guid); +extern void spa_spare_remove(uint64_t guid); +extern boolean_t spa_spare_inuse(uint64_t guid); + /* scrubbing */ extern int spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force); extern void spa_scrub_suspend(spa_t *spa); @@ -390,12 +400,14 @@ extern char *spa_name(spa_t *spa); extern uint64_t spa_guid(spa_t *spa); extern uint64_t spa_last_synced_txg(spa_t *spa); extern uint64_t spa_first_txg(spa_t *spa); +extern uint64_t spa_version(spa_t *spa); extern int spa_state(spa_t *spa); extern uint64_t spa_freeze_txg(spa_t *spa); struct metaslab_class; extern struct metaslab_class *spa_metaslab_class_select(spa_t *spa); extern uint64_t spa_get_alloc(spa_t *spa); extern uint64_t spa_get_space(spa_t *spa); +extern uint64_t spa_get_dspace(spa_t *spa); extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize); extern uint64_t spa_version(spa_t *spa); extern int spa_max_replication(spa_t *spa); @@ -412,6 +424,8 @@ extern void spa_freeze(spa_t *spa); extern void spa_upgrade(spa_t *spa); extern void spa_evict_all(void); extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid); +extern boolean_t spa_has_spare(spa_t *, uint64_t guid); +extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp); /* error handling */ struct zbookmark; diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index e4df4c9eab..9a2fea9c21 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -65,7 +65,6 @@ struct spa { nvlist_t *spa_config; /* last synced config */ nvlist_t *spa_config_syncing; /* currently syncing config */ uint64_t spa_config_txg; /* txg of last config change */ - spa_config_lock_t spa_config_lock; /* configuration changes */ kmutex_t spa_config_cache_lock; /* for spa_config RW_READER */ int spa_sync_pass; /* iterate-to-convergence */ int spa_state; /* pool state */ @@ -84,6 +83,11 @@ struct spa { txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */ vdev_t *spa_root_vdev; /* top-level vdev container */ list_t spa_dirty_list; /* vdevs with dirty labels */ + uint64_t spa_spares_object; /* MOS object for spare list */ + nvlist_t *spa_sparelist; /* cached spare config */ + vdev_t **spa_spares; /* available hot spares */ + int spa_nspares; /* number of hot spares */ + boolean_t spa_sync_spares; /* sync the spares list */ uint64_t spa_config_object; /* MOS object for pool config */ uint64_t spa_syncing_txg; /* txg currently syncing */ uint64_t spa_sync_bplist_obj; /* object for deferred frees */ @@ -122,11 +126,13 @@ struct spa { kmutex_t spa_errlist_lock; /* error list/ereport lock */ avl_tree_t spa_errlist_last; /* last error list */ avl_tree_t spa_errlist_scrub; /* scrub error list */ + uint64_t spa_deflate; /* should we deflate? */ /* * spa_refcnt must be the last element because it changes size based on * compilation options. In order for the MDB module to function * correctly, the other fields must remain in the same location. */ + spa_config_lock_t spa_config_lock; /* configuration changes */ refcount_t spa_refcount; /* number of opens */ }; diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h index 5a2e6750a0..760aeae560 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h @@ -60,9 +60,10 @@ typedef struct vdev_knob { extern int vdev_open(vdev_t *); extern int vdev_validate(vdev_t *); extern void vdev_close(vdev_t *); -extern int vdev_create(vdev_t *, uint64_t txg); +extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace); extern void vdev_init(vdev_t *, uint64_t txg); extern void vdev_reopen(vdev_t *); +extern int vdev_validate_spare(vdev_t *); extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev); extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid); @@ -85,8 +86,8 @@ extern void vdev_propagate_state(vdev_t *vd); extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux); -extern void vdev_space_update(vdev_t *vd, uint64_t space_delta, - uint64_t alloc_delta); +extern void vdev_space_update(vdev_t *vd, int64_t space_delta, + int64_t alloc_delta); extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize); @@ -116,7 +117,8 @@ extern void vdev_config_dirty(vdev_t *vd); extern void vdev_config_clean(vdev_t *vd); extern int vdev_config_sync(vdev_t *vd, uint64_t txg); -extern nvlist_t *vdev_config_generate(vdev_t *vd, int getstats); +extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd, + boolean_t getstats, boolean_t isspare); /* * Label routines @@ -125,7 +127,8 @@ struct uberblock; extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset); extern nvlist_t *vdev_label_read_config(vdev_t *vd); extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub); -int vdev_label_init(vdev_t *vd, uint64_t create_txg); +int vdev_label_init(vdev_t *vd, uint64_t create_txg, boolean_t isreplacing); +int vdev_label_spare(vdev_t *vd, uint64_t create_txg); #ifdef __cplusplus } diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 1b18df8cda..75e642a495 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -153,6 +153,7 @@ struct vdev { txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */ uint8_t vdev_reopen_wanted; /* async reopen wanted? */ list_node_t vdev_dirty_node; /* config dirty list */ + uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */ /* * Leaf vdev state. @@ -162,6 +163,7 @@ struct vdev { txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */ uint64_t vdev_wholedisk; /* true if this is a whole disk */ uint64_t vdev_offline; /* device taken offline? */ + uint64_t vdev_nparity; /* number of parity devices for raidz */ char *vdev_path; /* vdev path (if any) */ char *vdev_devid; /* vdev devid (if any) */ uint64_t vdev_fault_arg; /* fault injection paramater */ @@ -170,6 +172,7 @@ struct vdev { uint8_t vdev_cache_active; /* vdev_cache and vdev_queue */ uint8_t vdev_tmpoffline; /* device taken offline temporarily? */ uint8_t vdev_detached; /* device detached? */ + uint64_t vdev_isspare; /* was a hot spare */ vdev_queue_t vdev_queue; /* I/O deadline schedule queue */ vdev_cache_t vdev_cache; /* physical block cache */ uint64_t vdev_not_present; /* not present during import */ @@ -245,12 +248,13 @@ typedef struct vdev_label { #define VDEV_ALLOC_LOAD 0 #define VDEV_ALLOC_ADD 1 +#define VDEV_ALLOC_SPARE 2 /* * Allocate or free a vdev */ -extern vdev_t *vdev_alloc(spa_t *spa, nvlist_t *config, vdev_t *parent, - uint_t id, int alloctype); +extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config, + vdev_t *parent, uint_t id, int alloctype); extern void vdev_free(vdev_t *vd); /* @@ -280,6 +284,7 @@ extern vdev_ops_t vdev_raidz_ops; extern vdev_ops_t vdev_disk_ops; extern vdev_ops_t vdev_file_ops; extern vdev_ops_t vdev_missing_ops; +extern vdev_ops_t vdev_spare_ops; /* * Common size functions diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 726852cb4d..0bbd073fd7 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -49,6 +50,7 @@ static vdev_ops_t *vdev_ops_table[] = { &vdev_raidz_ops, &vdev_mirror_ops, &vdev_replacing_ops, + &vdev_spare_ops, &vdev_disk_ops, &vdev_file_ops, &vdev_missing_ops, @@ -324,6 +326,9 @@ vdev_free_common(vdev_t *vd) if (vd->vdev_devid) spa_strfree(vd->vdev_devid); + if (vd->vdev_isspare) + spa_spare_remove(vd->vdev_guid); + txg_list_destroy(&vd->vdev_ms_list); txg_list_destroy(&vd->vdev_dtl_list); mutex_enter(&vd->vdev_dtl_lock); @@ -345,8 +350,9 @@ vdev_free_common(vdev_t *vd) * creating a new vdev or loading an existing one - the behavior is slightly * different for each case. */ -vdev_t * -vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) +int +vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, + int alloctype) { vdev_ops_t *ops; char *type; @@ -356,10 +362,10 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) ASSERT(spa_config_held(spa, RW_WRITER)); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) - return (NULL); + return (EINVAL); if ((ops = vdev_getops(type)) == NULL) - return (NULL); + return (EINVAL); /* * If this is a load, get the vdev guid from the nvlist. @@ -370,12 +376,21 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || label_id != id) - return (NULL); + return (EINVAL); if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) - return (NULL); + return (EINVAL); + } else if (alloctype == VDEV_ALLOC_SPARE) { + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) + return (EINVAL); } + /* + * The first allocated vdev must be of type 'root'. + */ + if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL) + return (EINVAL); + vd = vdev_alloc_common(spa, id, guid, ops); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) @@ -384,6 +399,41 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) vd->vdev_devid = spa_strdup(vd->vdev_devid); /* + * Set the nparity propery for RAID-Z vdevs. + */ + if (ops == &vdev_raidz_ops) { + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, + &vd->vdev_nparity) == 0) { + /* + * Currently, we can only support 2 parity devices. + */ + if (vd->vdev_nparity > 2) + return (EINVAL); + /* + * Older versions can only support 1 parity device. + */ + if (vd->vdev_nparity == 2 && + spa_version(spa) < ZFS_VERSION_RAID6) + return (ENOTSUP); + + } else { + /* + * We require the parity to be specified for SPAs that + * support multiple parity levels. + */ + if (spa_version(spa) >= ZFS_VERSION_RAID6) + return (EINVAL); + + /* + * Otherwise, we default to 1 parity device for RAID-Z. + */ + vd->vdev_nparity = 1; + } + } else { + vd->vdev_nparity = 0; + } + + /* * Set the whole_disk property. If it's not specified, leave the value * as -1. */ @@ -404,6 +454,15 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift); /* + * Look for the 'is_spare' flag. If this is the case, then we are a + * repurposed hot spare. + */ + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE, + &vd->vdev_isspare); + if (vd->vdev_isspare) + spa_spare_add(vd->vdev_guid); + + /* * If we're a top-level vdev, try to load the allocation parameters. */ if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) { @@ -430,7 +489,9 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) */ vdev_add_child(parent, vd); - return (vd); + *vdp = vd; + + return (0); } void @@ -462,6 +523,7 @@ vdev_free(vdev_t *vd) vdev_metaslab_fini(vd); ASSERT3U(vd->vdev_stat.vs_space, ==, 0); + ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0); ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0); /* @@ -506,9 +568,11 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space; + tvd->vdev_stat.vs_dspace = svd->vdev_stat.vs_dspace; svd->vdev_stat.vs_alloc = 0; svd->vdev_stat.vs_space = 0; + svd->vdev_stat.vs_dspace = 0; for (t = 0; t < TXG_SIZE; t++) { while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL) @@ -526,6 +590,9 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) tvd->vdev_reopen_wanted = svd->vdev_reopen_wanted; svd->vdev_reopen_wanted = 0; + + tvd->vdev_deflate_ratio = svd->vdev_deflate_ratio; + svd->vdev_deflate_ratio = 0; } static void @@ -585,13 +652,28 @@ vdev_remove_parent(vdev_t *cvd) ASSERT(mvd->vdev_children == 1); ASSERT(mvd->vdev_ops == &vdev_mirror_ops || - mvd->vdev_ops == &vdev_replacing_ops); + mvd->vdev_ops == &vdev_replacing_ops || + mvd->vdev_ops == &vdev_spare_ops); cvd->vdev_ashift = mvd->vdev_ashift; vdev_remove_child(mvd, cvd); vdev_remove_child(pvd, mvd); cvd->vdev_id = mvd->vdev_id; vdev_add_child(pvd, cvd); + /* + * If we created a new toplevel vdev, then we need to change the child's + * vdev GUID to match the old toplevel vdev. Otherwise, we could have + * detached an offline device, and when we go to import the pool we'll + * think we have two toplevel vdevs, instead of a different version of + * the same toplevel vdev. + */ + if (cvd->vdev_top == cvd) { + pvd->vdev_guid_sum -= cvd->vdev_guid; + cvd->vdev_guid_sum -= cvd->vdev_guid; + cvd->vdev_guid = mvd->vdev_guid; + cvd->vdev_guid_sum += mvd->vdev_guid; + pvd->vdev_guid_sum += cvd->vdev_guid; + } vdev_top_update(cvd->vdev_top, cvd->vdev_top); if (cvd == cvd->vdev_top) @@ -801,6 +883,18 @@ vdev_open(vdev_t *vd) } /* + * If this is a top-level vdev, compute the raidz-deflation + * ratio. Note, we hard-code in 128k (1<<17) because it is the + * current "typical" blocksize. Even if SPA_MAXBLOCKSIZE + * changes, this algorithm must never change, or we will + * inconsistently account for existing bp's. + */ + if (vd->vdev_top == vd) { + vd->vdev_deflate_ratio = (1<<17) / + (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT); + } + + /* * This allows the ZFS DE to close cases appropriately. If a device * goes away and later returns, we want to close the associated case. * But it's not enough to simply post this only when a device goes from @@ -933,7 +1027,7 @@ vdev_reopen(vdev_t *vd) } int -vdev_create(vdev_t *vd, uint64_t txg) +vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing) { int error; @@ -952,7 +1046,7 @@ vdev_create(vdev_t *vd, uint64_t txg) /* * Recursively initialize all labels. */ - if ((error = vdev_label_init(vd, txg)) != 0) { + if ((error = vdev_label_init(vd, txg, isreplacing)) != 0) { vdev_close(vd); return (error); } @@ -1202,6 +1296,45 @@ vdev_load(vdev_t *vd) VDEV_AUX_CORRUPT_DATA); } +/* + * This special case of vdev_spare() is used for hot spares. It's sole purpose + * it to set the vdev state for the associated vdev. To do this, we make sure + * that we can open the underlying device, then try to read the label, and make + * sure that the label is sane and that it hasn't been repurposed to another + * pool. + */ +int +vdev_validate_spare(vdev_t *vd) +{ + nvlist_t *label; + uint64_t guid, version; + uint64_t state; + + if ((label = vdev_label_read_config(vd)) == NULL) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + return (-1); + } + + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 || + version > ZFS_VERSION || + nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 || + guid != vd->vdev_guid || + nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + nvlist_free(label); + return (-1); + } + + /* + * We don't actually check the pool state here. If it's in fact in + * use by another pool, we update this fact on the fly when requested. + */ + nvlist_free(label); + return (0); +} + void vdev_sync_done(vdev_t *vd, uint64_t txg) { @@ -1560,14 +1693,31 @@ vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete) * Update the in-core space usage stats for this vdev and the root vdev. */ void -vdev_space_update(vdev_t *vd, uint64_t space_delta, uint64_t alloc_delta) +vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta) { ASSERT(vd == vd->vdev_top); + int64_t dspace_delta = space_delta; do { + if (vd->vdev_ms_count) { + /* + * If this is a top-level vdev, apply the + * inverse of its psize-to-asize (ie. RAID-Z) + * space-expansion factor. We must calculate + * this here and not at the root vdev because + * the root vdev's psize-to-asize is simply the + * max of its childrens', thus not accurate + * enough for us. + */ + ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0); + dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) * + vd->vdev_deflate_ratio; + } + mutex_enter(&vd->vdev_stat_lock); vd->vdev_stat.vs_space += space_delta; vd->vdev_stat.vs_alloc += alloc_delta; + vd->vdev_stat.vs_dspace += dspace_delta; mutex_exit(&vd->vdev_stat_lock); } while ((vd = vd->vdev_parent) != NULL); } diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index 4627745067..335b3e5a36 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -187,7 +187,8 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, * Generate the nvlist representing this vdev's config. */ nvlist_t * -vdev_config_generate(vdev_t *vd, int getstats) +vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, + boolean_t isspare) { nvlist_t *nv = NULL; @@ -195,7 +196,9 @@ vdev_config_generate(vdev_t *vd, int getstats) VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id) == 0); + if (!isspare) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id) + == 0); VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); if (vd->vdev_path != NULL) @@ -206,6 +209,27 @@ vdev_config_generate(vdev_t *vd, int getstats) VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vd->vdev_devid) == 0); + if (vd->vdev_nparity != 0) { + ASSERT(strcmp(vd->vdev_ops->vdev_op_type, + VDEV_TYPE_RAIDZ) == 0); + + /* + * Make sure someone hasn't managed to sneak a fancy new vdev + * into a crufty old storage pool. + */ + ASSERT(vd->vdev_nparity == 1 || + (vd->vdev_nparity == 2 && + spa_version(spa) >= ZFS_VERSION_RAID6)); + + /* + * Note that we'll add the nparity tag even on storage pools + * that only support a single parity device -- older software + * will just ignore it. + */ + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, + vd->vdev_nparity) == 0); + } + if (vd->vdev_wholedisk != -1ULL) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, vd->vdev_wholedisk) == 0); @@ -213,7 +237,10 @@ vdev_config_generate(vdev_t *vd, int getstats) if (vd->vdev_not_present) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1) == 0); - if (vd == vd->vdev_top) { + if (vd->vdev_isspare) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0); + + if (!isspare && vd == vd->vdev_top) { VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, vd->vdev_ms_array) == 0); VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, @@ -243,8 +270,8 @@ vdev_config_generate(vdev_t *vd, int getstats) KM_SLEEP); for (c = 0; c < vd->vdev_children; c++) - child[c] = vdev_config_generate(vd->vdev_child[c], - getstats); + child[c] = vdev_config_generate(spa, vd->vdev_child[c], + getstats, isspare); VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, child, vd->vdev_children) == 0); @@ -307,8 +334,9 @@ vdev_label_read_config(vdev_t *vd) return (config); } -int -vdev_label_init(vdev_t *vd, uint64_t crtxg) +static int +vdev_label_common(vdev_t *vd, uint64_t crtxg, boolean_t isspare, + boolean_t isreplacing) { spa_t *spa = vd->vdev_spa; nvlist_t *label; @@ -324,7 +352,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg) ASSERT(spa_config_held(spa, RW_WRITER)); for (c = 0; c < vd->vdev_children; c++) - if ((error = vdev_label_init(vd->vdev_child[c], crtxg)) != 0) + if ((error = vdev_label_common(vd->vdev_child[c], + crtxg, isspare, isreplacing)) != 0) return (error); if (!vd->vdev_ops->vdev_op_leaf) @@ -346,7 +375,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg) */ if (crtxg != 0 && (label = vdev_label_read_config(vd)) != NULL) { - uint64_t state, pool_guid, device_guid, txg; + uint64_t state, pool_guid, device_guid, txg, spare; uint64_t mycrtxg = 0; (void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG, @@ -361,11 +390,61 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg) spa_guid_exists(pool_guid, device_guid) && nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, &txg) == 0 && (txg != 0 || mycrtxg == crtxg)) { - dprintf("vdev %s in use, pool_state %d\n", - vdev_description(vd), state); + if (isspare && pool_guid != spa_guid(spa) && + nvlist_lookup_uint64(label, + ZPOOL_CONFIG_IS_SPARE, &spare) == 0 && + !spa_has_spare(spa, device_guid)) { + /* + * If this is a request to add a spare that + * is actively in use in another pool, simply + * return success, after updating the guid. + */ + vdev_t *pvd = vd->vdev_parent; + + for (; pvd != NULL; pvd = pvd->vdev_parent) { + pvd->vdev_guid_sum -= vd->vdev_guid; + pvd->vdev_guid_sum += device_guid; + } + + vd->vdev_guid = vd->vdev_guid_sum = device_guid; + nvlist_free(label); + return (0); + } nvlist_free(label); return (EBUSY); } + + /* + * If this device is reserved as a hot spare for this pool, + * adopt its GUID, and mark it as such. This way we preserve + * the fact that it is a hot spare even as it is added and + * removed from the pool. + */ + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, + &state) == 0 && state == POOL_STATE_SPARE && + nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, + &device_guid) == 0) { + vdev_t *pvd = vd->vdev_parent; + + if ((isspare || !isreplacing) && + spa_has_spare(spa, device_guid)) { + nvlist_free(label); + return (EBUSY); + } + + for (; pvd != NULL; pvd = pvd->vdev_parent) { + pvd->vdev_guid_sum -= vd->vdev_guid; + pvd->vdev_guid_sum += device_guid; + } + + vd->vdev_guid = vd->vdev_guid_sum = device_guid; + + if (!isspare) { + vd->vdev_isspare = B_TRUE; + spa_spare_add(vd->vdev_guid); + } + } + nvlist_free(label); } @@ -380,14 +459,35 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg) * We mark it as being from txg 0 to indicate that it's not * really part of an active pool just yet. The labels will * be written again with a meaningful txg by spa_sync(). + * + * For hot spares, we generate a special label that identifies as a + * mutually shared hot spare. If this is being added as a hot spare, + * always write out the spare label. If this was a hot spare, then + * always label it as such. If we are adding the vdev, it will remain + * labelled in this state until it's really added to the config. If we + * are removing the vdev or destroying the pool, then it goes back to + * its original hot spare state. */ - label = spa_config_generate(spa, vd, 0ULL, B_FALSE); - - /* - * Add our creation time. This allows us to detect multiple vdev - * uses as described above, and automatically expires if we fail. - */ - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG, crtxg) == 0); + if (isspare || vd->vdev_isspare) { + VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, + spa_version(spa)) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, + POOL_STATE_SPARE) == 0); + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, + vd->vdev_guid) == 0); + } else { + label = spa_config_generate(spa, vd, 0ULL, B_FALSE); + + /* + * Add our creation time. This allows us to detect multiple + * vdev uses as described above, and automatically expires if we + * fail. + */ + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG, + crtxg) == 0); + } buf = vp->vp_nvlist; buflen = sizeof (vp->vp_nvlist); @@ -449,6 +549,22 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg) return (error); } +int +vdev_label_init(vdev_t *vd, uint64_t crtxg, boolean_t isreplacing) +{ + return (vdev_label_common(vd, crtxg, B_FALSE, isreplacing)); +} + +/* + * Label a disk as a hot spare. A hot spare label is a special label with only + * the following members: version, pool_state, and guid. + */ +int +vdev_label_spare(vdev_t *vd, uint64_t crtxg) +{ + return (vdev_label_common(vd, crtxg, B_TRUE, B_FALSE)); +} + /* * ========================================================================== * uberblock load/sync diff --git a/usr/src/uts/common/fs/zfs/vdev_mirror.c b/usr/src/uts/common/fs/zfs/vdev_mirror.c index eb3f0a862d..14a6ce7e6e 100644 --- a/usr/src/uts/common/fs/zfs/vdev_mirror.c +++ b/usr/src/uts/common/fs/zfs/vdev_mirror.c @@ -85,6 +85,7 @@ vdev_mirror_map_alloc(zio_t *zio) for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; + mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); mc->mc_offset = DVA_GET_OFFSET(&dva[c]); } @@ -93,7 +94,8 @@ vdev_mirror_map_alloc(zio_t *zio) mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP); mm->mm_children = c; - mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops); + mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops || + vd->vdev_ops == &vdev_spare_ops); mm->mm_preferred = mm->mm_replacing ? 0 : spa_get_random(c); mm->mm_root = B_FALSE; @@ -477,3 +479,14 @@ vdev_ops_t vdev_replacing_ops = { VDEV_TYPE_REPLACING, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; + +vdev_ops_t vdev_spare_ops = { + vdev_mirror_open, + vdev_mirror_close, + vdev_default_asize, + vdev_mirror_io_start, + vdev_mirror_io_done, + vdev_mirror_state_change, + VDEV_TYPE_SPARE, /* name of this vdev type */ + B_FALSE /* not a leaf vdev */ +}; diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c index 33225de39b..3afeab0aef 100644 --- a/usr/src/uts/common/fs/zfs/vdev_raidz.c +++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -35,49 +36,178 @@ /* * Virtual device vector for RAID-Z. + * + * This vdev supports both single and double parity. For single parity, we + * use a simple XOR of all the data columns. For double parity, we use both + * the simple XOR as well as a technique described in "The mathematics of + * RAID-6" by H. Peter Anvin. This technique defines a Galois field, GF(2^8), + * over the integers expressable in a single byte. Briefly, the operations on + * the field are defined as follows: + * + * o addition (+) is represented by a bitwise XOR + * o subtraction (-) is therefore identical to addition: A + B = A - B + * o multiplication of A by 2 is defined by the following bitwise expression: + * (A * 2)_7 = A_6 + * (A * 2)_6 = A_5 + * (A * 2)_5 = A_4 + * (A * 2)_4 = A_3 + A_7 + * (A * 2)_3 = A_2 + A_7 + * (A * 2)_2 = A_1 + A_7 + * (A * 2)_1 = A_0 + * (A * 2)_0 = A_7 + * + * In C, multiplying by 2 is therefore ((a << 1) ^ ((a & 0x80) ? 0x1d : 0)). + * + * Observe that any number in the field (except for 0) can be expressed as a + * power of 2 -- a generator for the field. We store a table of the powers of + * 2 and logs base 2 for quick look ups, and exploit the fact that A * B can + * be rewritten as 2^(log_2(A) + log_2(B)) (where '+' is normal addition rather + * than field addition). The inverse of a field element A (A^-1) is A^254. + * + * The two parity columns, P and Q, over several data columns, D_0, ... D_n-1, + * can be expressed by field operations: + * + * P = D_0 + D_1 + ... + D_n-2 + D_n-1 + * Q = 2^n-1 * D_0 + 2^n-2 * D_1 + ... + 2^1 * D_n-2 + 2^0 * D_n-1 + * = ((...((D_0) * 2 + D_1) * 2 + ...) * 2 + D_n-2) * 2 + D_n-1 + * + * See the reconstruction code below for how P and Q can used individually or + * in concert to recover missing data columns. */ -/* - * We currently allow up to two-way replication (i.e. single-fault - * reconstruction) models in RAID-Z vdevs. The blocks in such vdevs - * must all be multiples of two times the leaf vdev blocksize. - */ -#define VDEV_RAIDZ_ALIGN 2ULL - typedef struct raidz_col { - uint64_t rc_col; - uint64_t rc_offset; - uint64_t rc_size; - void *rc_data; - int rc_error; - short rc_tried; - short rc_skipped; + uint64_t rc_devidx; /* child device index for I/O */ + uint64_t rc_offset; /* device offset */ + uint64_t rc_size; /* I/O size */ + void *rc_data; /* I/O data */ + int rc_error; /* I/O error for this device */ + uint8_t rc_tried; /* Did we attempt this I/O column? */ + uint8_t rc_skipped; /* Did we skip this I/O column? */ } raidz_col_t; typedef struct raidz_map { - uint64_t rm_cols; - uint64_t rm_bigcols; - uint64_t rm_asize; - int rm_missing_child; - int rm_firstdatacol; - raidz_col_t rm_col[1]; + uint64_t rm_cols; /* Column count */ + uint64_t rm_bigcols; /* Number of oversized columns */ + uint64_t rm_asize; /* Actual total I/O size */ + uint64_t rm_missingdata; /* Count of missing data devices */ + uint64_t rm_missingparity; /* Count of missing parity devices */ + uint64_t rm_firstdatacol; /* First data column/parity count */ + raidz_col_t rm_col[1]; /* Flexible array of I/O columns */ } raidz_map_t; +#define VDEV_RAIDZ_P 0 +#define VDEV_RAIDZ_Q 1 + +#define VDEV_RAIDZ_MAXPARITY 2 + +#define VDEV_RAIDZ_MUL_2(a) (((a) << 1) ^ (((a) & 0x80) ? 0x1d : 0)) + +/* + * These two tables represent powers and logs of 2 in the Galois field defined + * above. These values were computed by repeatedly multiplying by 2 as above. + */ +static const uint8_t vdev_raidz_pow2[256] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, + 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, + 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, + 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, + 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, + 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, + 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, + 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, + 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, + 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, + 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, + 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, + 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, + 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, + 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, + 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, + 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, + 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, + 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, + 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, + 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, + 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, + 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, + 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, + 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, + 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, + 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, + 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, + 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, + 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, + 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 +}; +static const uint8_t vdev_raidz_log2[256] = { + 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, + 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, + 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, + 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71, + 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, + 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, + 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, + 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6, + 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, + 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, + 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, + 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, + 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, + 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, + 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, + 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, + 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, + 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, + 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, + 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, + 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, + 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, + 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, + 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, + 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, + 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, + 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, + 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, + 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, + 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, + 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, + 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf, +}; + +/* + * Multiply a given number by 2 raised to the given power. + */ +static uint8_t +vdev_raidz_exp2(uint_t a, int exp) +{ + if (a == 0) + return (0); + + ASSERT(exp >= 0); + ASSERT(vdev_raidz_log2[a] > 0 || a == 1); + + exp += vdev_raidz_log2[a]; + if (exp > 255) + exp -= 255; + + return (vdev_raidz_pow2[exp]); +} + static raidz_map_t * -vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols) +vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, + uint64_t nparity) { raidz_map_t *rm; uint64_t b = zio->io_offset >> unit_shift; uint64_t s = zio->io_size >> unit_shift; uint64_t f = b % dcols; uint64_t o = (b / dcols) << unit_shift; - uint64_t q, r, c, bc, col, acols, coff; - int firstdatacol; + uint64_t q, r, c, bc, col, acols, coff, devidx; - q = s / (dcols - 1); - r = s - q * (dcols - 1); - bc = r + !!r; - firstdatacol = 1; + q = s / (dcols - nparity); + r = s - q * (dcols - nparity); + bc = (r == 0 ? 0 : r + nparity); acols = (q == 0 ? bc : dcols); @@ -86,8 +216,9 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols) rm->rm_cols = acols; rm->rm_bigcols = bc; rm->rm_asize = 0; - rm->rm_missing_child = -1; - rm->rm_firstdatacol = firstdatacol; + rm->rm_missingdata = 0; + rm->rm_missingparity = 0; + rm->rm_firstdatacol = nparity; for (c = 0; c < acols; c++) { col = f + c; @@ -96,7 +227,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols) col -= dcols; coff += 1ULL << unit_shift; } - rm->rm_col[c].rc_col = col; + rm->rm_col[c].rc_devidx = col; rm->rm_col[c].rc_offset = coff; rm->rm_col[c].rc_size = (q + (c < bc)) << unit_shift; rm->rm_col[c].rc_data = NULL; @@ -106,7 +237,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols) rm->rm_asize += rm->rm_col[c].rc_size; } - rm->rm_asize = P2ROUNDUP(rm->rm_asize, VDEV_RAIDZ_ALIGN << unit_shift); + rm->rm_asize = roundup(rm->rm_asize, (nparity + 1) << unit_shift); for (c = 0; c < rm->rm_firstdatacol; c++) rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size); @@ -118,18 +249,29 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols) rm->rm_col[c - 1].rc_size; /* - * To prevent hot parity disks, switch the parity and data - * columns every 1MB. + * If all data stored spans all columns, there's a danger that parity + * will always be on the same device and, since parity isn't read + * during normal operation, that that device's I/O bandwidth won't be + * used effectively. We therefore switch the parity every 1MB. + * + * ... at least that was, ostensibly, the theory. As a practical + * matter unless we juggle the parity between all devices evenly, we + * won't see any benefit. Further, occasional writes that aren't a + * multiple of the LCM of the number of children and the minimum + * stripe width are sufficient to avoid pessimal behavior. + * Unfortunately, this decision created an implicit on-disk format + * requirement that we need to support for all eternity (but only for + * RAID-Z with one parity device). */ ASSERT(rm->rm_cols >= 2); ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size); - if (zio->io_offset & (1ULL << 20)) { - col = rm->rm_col[0].rc_col; + if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) { + devidx = rm->rm_col[0].rc_devidx; o = rm->rm_col[0].rc_offset; - rm->rm_col[0].rc_col = rm->rm_col[1].rc_col; + rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx; rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset; - rm->rm_col[1].rc_col = col; + rm->rm_col[1].rc_devidx = devidx; rm->rm_col[1].rc_offset = o; } @@ -151,47 +293,284 @@ vdev_raidz_map_free(zio_t *zio) } static void -vdev_raidz_reconstruct(raidz_map_t *rm, int x) +vdev_raidz_generate_parity_p(raidz_map_t *rm) { - uint64_t *dst, *src, count, xsize, csize; - int i, c; + uint64_t *p, *src, pcount, ccount, i; + int c; + + pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); + + if (c == rm->rm_firstdatacol) { + ASSERT(ccount == pcount); + for (i = 0; i < ccount; i++, p++, src++) { + *p = *src; + } + } else { + ASSERT(ccount <= pcount); + for (i = 0; i < ccount; i++, p++, src++) { + *p ^= *src; + } + } + } +} + +static void +vdev_raidz_generate_parity_pq(raidz_map_t *rm) +{ + uint64_t *q, *p, *src, pcount, ccount, mask, i; + int c; + + pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); + ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == + rm->rm_col[VDEV_RAIDZ_Q].rc_size); + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); + + if (c == rm->rm_firstdatacol) { + ASSERT(ccount == pcount || ccount == 0); + for (i = 0; i < ccount; i++, p++, q++, src++) { + *q = *src; + *p = *src; + } + for (; i < pcount; i++, p++, q++, src++) { + *q = 0; + *p = 0; + } + } else { + ASSERT(ccount <= pcount); + + /* + * Rather than multiplying each byte individually (as + * described above), we are able to handle 8 at once + * by generating a mask based on the high bit in each + * byte and using that to conditionally XOR in 0x1d. + */ + for (i = 0; i < ccount; i++, p++, q++, src++) { + mask = *q & 0x8080808080808080ULL; + mask = (mask << 1) - (mask >> 7); + *q = ((*q << 1) & 0xfefefefefefefefeULL) ^ + (mask & 0x1d1d1d1d1d1d1d1dULL); + *q ^= *src; + *p ^= *src; + } + + /* + * Treat short columns as though they are full of 0s. + */ + for (; i < pcount; i++, q++) { + mask = *q & 0x8080808080808080ULL; + mask = (mask << 1) - (mask >> 7); + *q = ((*q << 1) & 0xfefefefefefefefeULL) ^ + (mask & 0x1d1d1d1d1d1d1d1dULL); + } + } + } +} + +static void +vdev_raidz_reconstruct_p(raidz_map_t *rm, int x) +{ + uint64_t *dst, *src, xcount, ccount, count, i; + int c; + + xcount = rm->rm_col[x].rc_size / sizeof (src[0]); + ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0])); + ASSERT(xcount > 0); + + src = rm->rm_col[VDEV_RAIDZ_P].rc_data; + dst = rm->rm_col[x].rc_data; + for (i = 0; i < xcount; i++, dst++, src++) { + *dst = *src; + } + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + src = rm->rm_col[c].rc_data; + dst = rm->rm_col[x].rc_data; - for (c = 0; c < rm->rm_cols; c++) { if (c == x) continue; + + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); + count = MIN(ccount, xcount); + + for (i = 0; i < count; i++, dst++, src++) { + *dst ^= *src; + } + } +} + +static void +vdev_raidz_reconstruct_q(raidz_map_t *rm, int x) +{ + uint64_t *dst, *src, xcount, ccount, count, mask, i; + uint8_t *b; + int c, j, exp; + + xcount = rm->rm_col[x].rc_size / sizeof (src[0]); + ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0])); + + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { src = rm->rm_col[c].rc_data; dst = rm->rm_col[x].rc_data; - csize = rm->rm_col[c].rc_size; - xsize = rm->rm_col[x].rc_size; - count = MIN(csize, xsize) / sizeof (uint64_t); - if (c == !x) { + + if (c == x) + ccount = 0; + else + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); + + count = MIN(ccount, xcount); + + if (c == rm->rm_firstdatacol) { + for (i = 0; i < count; i++, dst++, src++) { + *dst = *src; + } + for (; i < xcount; i++, dst++) { + *dst = 0; + } + + } else { /* - * The initial copy happens at either c == 0 or c == 1. - * Both of these columns are 'big' columns, so we'll - * definitely initialize all of column x. + * For an explanation of this, see the comment in + * vdev_raidz_generate_parity_pq() above. */ - ASSERT3U(xsize, <=, csize); - for (i = 0; i < count; i++) - *dst++ = *src++; - } else { - for (i = 0; i < count; i++) - *dst++ ^= *src++; + for (i = 0; i < count; i++, dst++, src++) { + mask = *dst & 0x8080808080808080ULL; + mask = (mask << 1) - (mask >> 7); + *dst = ((*dst << 1) & 0xfefefefefefefefeULL) ^ + (mask & 0x1d1d1d1d1d1d1d1dULL); + *dst ^= *src; + } + + for (; i < xcount; i++, dst++) { + mask = *dst & 0x8080808080808080ULL; + mask = (mask << 1) - (mask >> 7); + *dst = ((*dst << 1) & 0xfefefefefefefefeULL) ^ + (mask & 0x1d1d1d1d1d1d1d1dULL); + } + } + } + + src = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + dst = rm->rm_col[x].rc_data; + exp = 255 - (rm->rm_cols - 1 - x); + + for (i = 0; i < xcount; i++, dst++, src++) { + *dst ^= *src; + for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) { + *b = vdev_raidz_exp2(*b, exp); } } } +static void +vdev_raidz_reconstruct_pq(raidz_map_t *rm, int x, int y) +{ + uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp; + void *pdata, *qdata; + uint64_t xsize, ysize, i; + + ASSERT(x < y); + ASSERT(x >= rm->rm_firstdatacol); + ASSERT(y < rm->rm_cols); + + ASSERT(rm->rm_col[x].rc_size >= rm->rm_col[y].rc_size); + + /* + * Move the parity data aside -- we're going to compute parity as + * though columns x and y were full of zeros -- Pxy and Qxy. We want to + * reuse the parity generation mechanism without trashing the actual + * parity so we make those columns appear to be full of zeros by + * setting their lengths to zero. + */ + pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data; + qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + xsize = rm->rm_col[x].rc_size; + ysize = rm->rm_col[y].rc_size; + + rm->rm_col[VDEV_RAIDZ_P].rc_data = + zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size); + rm->rm_col[VDEV_RAIDZ_Q].rc_data = + zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size); + rm->rm_col[x].rc_size = 0; + rm->rm_col[y].rc_size = 0; + + vdev_raidz_generate_parity_pq(rm); + + rm->rm_col[x].rc_size = xsize; + rm->rm_col[y].rc_size = ysize; + + p = pdata; + q = qdata; + pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data; + qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + xd = rm->rm_col[x].rc_data; + yd = rm->rm_col[y].rc_data; + + /* + * We now have: + * Pxy = P + D_x + D_y + * Qxy = Q + 2^(ndevs - 1 - x) * D_x + 2^(ndevs - 1 - y) * D_y + * + * We can then solve for D_x: + * D_x = A * (P + Pxy) + B * (Q + Qxy) + * where + * A = 2^(x - y) * (2^(x - y) + 1)^-1 + * B = 2^(ndevs - 1 - x) * (2^(x - y) + 1)^-1 + * + * With D_x in hand, we can easily solve for D_y: + * D_y = P + Pxy + D_x + */ + + a = vdev_raidz_pow2[255 + x - y]; + b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)]; + tmp = 255 - vdev_raidz_log2[a ^ 1]; + + aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)]; + bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)]; + + for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) { + *xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^ + vdev_raidz_exp2(*q ^ *qxy, bexp); + + if (i < ysize) + *yd = *p ^ *pxy ^ *xd; + } + + zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data, + rm->rm_col[VDEV_RAIDZ_P].rc_size); + zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data, + rm->rm_col[VDEV_RAIDZ_Q].rc_size); + + /* + * Restore the saved parity data. + */ + rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata; + rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata; +} + + static int vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) { vdev_t *cvd; + uint64_t nparity = vd->vdev_nparity; int c, error; int lasterror = 0; int numerrors = 0; - /* - * XXX -- minimum children should be raid-type-specific - */ - if (vd->vdev_children < 2) { + ASSERT(nparity > 0); + + if (nparity > VDEV_RAIDZ_MAXPARITY || + vd->vdev_children < nparity + 1) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } @@ -211,7 +590,7 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) *asize *= vd->vdev_children; - if (numerrors > 1) { + if (numerrors > nparity) { vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; return (lasterror); } @@ -234,10 +613,11 @@ vdev_raidz_asize(vdev_t *vd, uint64_t psize) uint64_t asize; uint64_t ashift = vd->vdev_top->vdev_ashift; uint64_t cols = vd->vdev_children; + uint64_t nparity = vd->vdev_nparity; asize = ((psize - 1) >> ashift) + 1; - asize += (asize + cols - 2) / (cols - 1); - asize = P2ROUNDUP(asize, VDEV_RAIDZ_ALIGN) << ashift; + asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity)); + asize = roundup(asize, nparity + 1) << ashift; return (asize); } @@ -270,20 +650,23 @@ vdev_raidz_io_start(zio_t *zio) raidz_col_t *rc; int c; - rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children); + rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children, + vd->vdev_nparity); ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size)); if (zio->io_type == ZIO_TYPE_WRITE) { - /* - * Generate RAID parity in virtual column 0. + * Generate RAID parity in the first virtual columns. */ - vdev_raidz_reconstruct(rm, 0); + if (rm->rm_firstdatacol == 1) + vdev_raidz_generate_parity_p(rm); + else + vdev_raidz_generate_parity_pq(rm); for (c = 0; c < rm->rm_cols; c++) { rc = &rm->rm_col[c]; - cvd = vd->vdev_child[rc->rc_col]; + cvd = vd->vdev_child[rc->rc_devidx]; zio_nowait(zio_vdev_child_io(zio, NULL, cvd, rc->rc_offset, rc->rc_data, rc->rc_size, zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL, @@ -295,23 +678,34 @@ vdev_raidz_io_start(zio_t *zio) ASSERT(zio->io_type == ZIO_TYPE_READ); + /* + * Iterate over the columns in reverse order so that we hit the parity + * last -- any errors along the way will force us to read the parity + * data. + */ for (c = rm->rm_cols - 1; c >= 0; c--) { rc = &rm->rm_col[c]; - cvd = vd->vdev_child[rc->rc_col]; + cvd = vd->vdev_child[rc->rc_devidx]; if (vdev_is_dead(cvd)) { - rm->rm_missing_child = c; + if (c >= rm->rm_firstdatacol) + rm->rm_missingdata++; + else + rm->rm_missingparity++; rc->rc_error = ENXIO; rc->rc_tried = 1; /* don't even try */ rc->rc_skipped = 1; continue; } if (vdev_dtl_contains(&cvd->vdev_dtl_map, bp->blk_birth, 1)) { - rm->rm_missing_child = c; + if (c >= rm->rm_firstdatacol) + rm->rm_missingdata++; + else + rm->rm_missingparity++; rc->rc_error = ESTALE; rc->rc_skipped = 1; continue; } - if (c >= rm->rm_firstdatacol || rm->rm_missing_child != -1 || + if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 || (zio->io_flags & ZIO_FLAG_SCRUB)) { zio_nowait(zio_vdev_child_io(zio, NULL, cvd, rc->rc_offset, rc->rc_data, rc->rc_size, @@ -329,7 +723,7 @@ vdev_raidz_io_start(zio_t *zio) static void raidz_checksum_error(zio_t *zio, raidz_col_t *rc) { - vdev_t *vd = zio->io_vd->vdev_child[rc->rc_col]; + vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx]; dprintf_bp(zio->io_bp, "imputed checksum error on %s: ", vdev_description(vd)); @@ -344,6 +738,50 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc) zio->io_spa, vd, zio, rc->rc_offset, rc->rc_size); } +/* + * Generate the parity from the data columns. If we tried and were able to + * read the parity without error, verify that the generated parity matches the + * data we read. If it doesn't, we fire off a checksum error. Return the + * number such failures. + */ +static int +raidz_parity_verify(zio_t *zio, raidz_map_t *rm) +{ + void *orig[VDEV_RAIDZ_MAXPARITY]; + int c, ret = 0; + raidz_col_t *rc; + + for (c = 0; c < rm->rm_firstdatacol; c++) { + rc = &rm->rm_col[c]; + if (!rc->rc_tried || rc->rc_error != 0) + continue; + orig[c] = zio_buf_alloc(rc->rc_size); + bcopy(rc->rc_data, orig[c], rc->rc_size); + } + + if (rm->rm_firstdatacol == 1) + vdev_raidz_generate_parity_p(rm); + else + vdev_raidz_generate_parity_pq(rm); + + for (c = 0; c < rm->rm_firstdatacol; c++) { + rc = &rm->rm_col[c]; + if (!rc->rc_tried || rc->rc_error != 0) + continue; + if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) { + raidz_checksum_error(zio, rc); + rc->rc_error = ECKSUM; + ret++; + } + zio_buf_free(orig[c], rc->rc_size); + } + + return (ret); +} + +static uint64_t raidz_corrected_p; +static uint64_t raidz_corrected_q; +static uint64_t raidz_corrected_pq; static void vdev_raidz_io_done(zio_t *zio) @@ -351,15 +789,20 @@ vdev_raidz_io_done(zio_t *zio) vdev_t *vd = zio->io_vd; vdev_t *cvd; raidz_map_t *rm = zio->io_vsd; - raidz_col_t *rc; + raidz_col_t *rc, *rc1; int unexpected_errors = 0; - int c; + int parity_errors = 0; + int data_errors = 0; + int n, c, c1; ASSERT(zio->io_bp != NULL); /* XXX need to add code to enforce this */ zio->io_error = 0; zio->io_numerrors = 0; + ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol); + ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol); + for (c = 0; c < rm->rm_cols; c++) { rc = &rm->rm_col[c]; @@ -370,8 +813,15 @@ vdev_raidz_io_done(zio_t *zio) if (rc->rc_error) { if (zio->io_error != EIO) zio->io_error = rc->rc_error; + + if (c < rm->rm_firstdatacol) + parity_errors++; + else + data_errors++; + if (!rc->rc_skipped) unexpected_errors++; + zio->io_numerrors++; } } @@ -392,149 +842,288 @@ vdev_raidz_io_done(zio_t *zio) } ASSERT(zio->io_type == ZIO_TYPE_READ); + /* + * There are three potential phases for a read: + * 1. produce valid data from the columns read + * 2. read all disks and try again + * 3. perform combinatorial reconstruction + * + * Each phase is progressively both more expensive and less likely to + * occur. If we encounter more errors than we can repair or all phases + * fail, we have no choice but to return an error. + */ /* - * If there were no I/O errors, and the data checksums correctly, - * the read is complete. + * If the number of errors we saw was correctable -- less than or equal + * to the number of parity disks -- attempt to produce data that has a + * valid checksum. Naturally, zero errors falls into this case. */ - /* XXPOLICY */ - if (zio->io_numerrors == 0 && zio_checksum_error(zio) == 0) { - ASSERT(unexpected_errors == 0); - ASSERT(zio->io_error == 0); + if (zio->io_numerrors <= rm->rm_firstdatacol) { + switch (data_errors) { + case 0: + if (zio_checksum_error(zio) == 0) { + zio->io_error = 0; + n = raidz_parity_verify(zio, rm); + unexpected_errors += n; + ASSERT(parity_errors + n <= + rm->rm_firstdatacol); + goto done; + } + break; - /* - * We know the data's good. If we read the parity, - * verify that it's good as well. If not, fix it. - */ - for (c = 0; c < rm->rm_firstdatacol; c++) { - void *orig; - rc = &rm->rm_col[c]; - if (!rc->rc_tried) - continue; - orig = zio_buf_alloc(rc->rc_size); - bcopy(rc->rc_data, orig, rc->rc_size); - vdev_raidz_reconstruct(rm, c); - if (bcmp(orig, rc->rc_data, rc->rc_size) != 0) { - raidz_checksum_error(zio, rc); - rc->rc_error = ECKSUM; - unexpected_errors++; + case 1: + ASSERT(parity_errors < rm->rm_firstdatacol); + + /* + * Find the column that reported the error. + */ + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + rc = &rm->rm_col[c]; + if (rc->rc_error != 0) + break; + } + ASSERT(c != rm->rm_cols); + ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO || + rc->rc_error == ESTALE); + + if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) { + vdev_raidz_reconstruct_p(rm, c); + } else { + ASSERT(rm->rm_firstdatacol > 1); + vdev_raidz_reconstruct_q(rm, c); } - zio_buf_free(orig, rc->rc_size); - } - goto done; - } - /* - * If there was exactly one I/O error, it's the one we expected, - * and the reconstructed data checksums, the read is complete. - * This happens when one child is offline and vdev_fault_assess() - * knows it, or when one child has stale data and the DTL knows it. - */ - if (zio->io_numerrors == 1 && (c = rm->rm_missing_child) != -1) { - rc = &rm->rm_col[c]; - ASSERT(unexpected_errors == 0); - ASSERT(rc->rc_error == ENXIO || rc->rc_error == ESTALE); - vdev_raidz_reconstruct(rm, c); - if (zio_checksum_error(zio) == 0) { - zio->io_error = 0; - goto done; + if (zio_checksum_error(zio) == 0) { + zio->io_error = 0; + if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) + atomic_inc_64(&raidz_corrected_p); + else + atomic_inc_64(&raidz_corrected_q); + + /* + * If there's more than one parity disk, + * confirm that the parity disk not used above + * has the correct data. + */ + if (rm->rm_firstdatacol > 1) { + n = raidz_parity_verify(zio, rm); + unexpected_errors += n; + ASSERT(parity_errors + n <= + rm->rm_firstdatacol); + } + + goto done; + } + break; + + case 2: + /* + * Find the two columns that reported errors. + */ + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + rc = &rm->rm_col[c]; + if (rc->rc_error != 0) + break; + } + ASSERT(c != rm->rm_cols); + ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO || + rc->rc_error == ESTALE); + + for (c1 = c++; c < rm->rm_cols; c++) { + rc = &rm->rm_col[c]; + if (rc->rc_error != 0) + break; + } + ASSERT(c != rm->rm_cols); + ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO || + rc->rc_error == ESTALE); + + vdev_raidz_reconstruct_pq(rm, c1, c); + + if (zio_checksum_error(zio) == 0) { + zio->io_error = 0; + atomic_inc_64(&raidz_corrected_pq); + + goto done; + } + break; + + default: + ASSERT(rm->rm_firstdatacol <= 2); + ASSERT(0); } } /* - * This isn't a typical error -- either we got a read error or - * more than one child claimed a problem. Read every block we - * haven't already so we can try combinatorial reconstruction. + * This isn't a typical situation -- either we got a read error or + * a child silently returned bad data. Read every block so we can + * try again with as much data and parity as we can track down. If + * we've already been through once before, all children will be marked + * as tried so we'll proceed to combinatorial reconstruction. */ unexpected_errors = 1; - rm->rm_missing_child = -1; + rm->rm_missingdata = 0; + rm->rm_missingparity = 0; - for (c = 0; c < rm->rm_cols; c++) - if (!rm->rm_col[c].rc_tried) - break; + for (c = 0; c < rm->rm_cols; c++) { + if (rm->rm_col[c].rc_tried) + continue; - if (c != rm->rm_cols) { zio->io_error = 0; zio_vdev_io_redone(zio); - for (c = 0; c < rm->rm_cols; c++) { + do { rc = &rm->rm_col[c]; if (rc->rc_tried) continue; zio_nowait(zio_vdev_child_io(zio, NULL, - vd->vdev_child[rc->rc_col], + vd->vdev_child[rc->rc_devidx], rc->rc_offset, rc->rc_data, rc->rc_size, zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL, vdev_raidz_child_done, rc)); - } + } while (++c < rm->rm_cols); + dprintf("rereading\n"); zio_wait_children_done(zio); return; } /* - * If there were more errors than parity disks, give up. + * At this point we've attempted to reconstruct the data given the + * errors we detected, and we've attempted to read all columns. There + * must, therefore, be one or more additional problems -- silent errors + * resulting in invalid data rather than explicit I/O errors resulting + * in absent data. Before we attempt combinatorial reconstruction make + * sure we have a chance of coming up with the right answer. */ - if (zio->io_numerrors > rm->rm_firstdatacol) { + if (zio->io_numerrors >= rm->rm_firstdatacol) { ASSERT(zio->io_error != 0); goto done; } - /* - * The number of I/O errors is correctable. Correct them here. - */ - ASSERT(zio->io_numerrors <= rm->rm_firstdatacol); - for (c = 0; c < rm->rm_cols; c++) { - rc = &rm->rm_col[c]; - ASSERT(rc->rc_tried); - if (rc->rc_error) { - vdev_raidz_reconstruct(rm, c); - if (zio_checksum_error(zio) == 0) + if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) { + /* + * Attempt to reconstruct the data from parity P. + */ + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + void *orig; + rc = &rm->rm_col[c]; + + orig = zio_buf_alloc(rc->rc_size); + bcopy(rc->rc_data, orig, rc->rc_size); + vdev_raidz_reconstruct_p(rm, c); + + if (zio_checksum_error(zio) == 0) { + zio_buf_free(orig, rc->rc_size); zio->io_error = 0; - else - zio->io_error = rc->rc_error; - goto done; + atomic_inc_64(&raidz_corrected_p); + + /* + * If this child didn't know that it returned + * bad data, inform it. + */ + if (rc->rc_tried && rc->rc_error == 0) + raidz_checksum_error(zio, rc); + rc->rc_error = ECKSUM; + goto done; + } + + bcopy(orig, rc->rc_data, rc->rc_size); + zio_buf_free(orig, rc->rc_size); } } - /* - * There were no I/O errors, but the data doesn't checksum. - * Try all permutations to see if we can find one that does. - */ - ASSERT(zio->io_numerrors == 0); - for (c = 0; c < rm->rm_cols; c++) { - void *orig; - rc = &rm->rm_col[c]; + if (rm->rm_firstdatacol > 1 && rm->rm_col[VDEV_RAIDZ_Q].rc_error == 0) { + /* + * Attempt to reconstruct the data from parity Q. + */ + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + void *orig; + rc = &rm->rm_col[c]; + + orig = zio_buf_alloc(rc->rc_size); + bcopy(rc->rc_data, orig, rc->rc_size); + vdev_raidz_reconstruct_q(rm, c); - orig = zio_buf_alloc(rc->rc_size); - bcopy(rc->rc_data, orig, rc->rc_size); - vdev_raidz_reconstruct(rm, c); + if (zio_checksum_error(zio) == 0) { + zio_buf_free(orig, rc->rc_size); + zio->io_error = 0; + atomic_inc_64(&raidz_corrected_q); + + /* + * If this child didn't know that it returned + * bad data, inform it. + */ + if (rc->rc_tried && rc->rc_error == 0) + raidz_checksum_error(zio, rc); + rc->rc_error = ECKSUM; + goto done; + } - if (zio_checksum_error(zio) == 0) { + bcopy(orig, rc->rc_data, rc->rc_size); zio_buf_free(orig, rc->rc_size); - zio->io_error = 0; - /* - * If this child didn't know that it returned bad data, - * inform it. - */ - if (rc->rc_tried && rc->rc_error == 0) - raidz_checksum_error(zio, rc); - rc->rc_error = ECKSUM; - goto done; } + } - bcopy(orig, rc->rc_data, rc->rc_size); - zio_buf_free(orig, rc->rc_size); + if (rm->rm_firstdatacol > 1 && + rm->rm_col[VDEV_RAIDZ_P].rc_error == 0 && + rm->rm_col[VDEV_RAIDZ_Q].rc_error == 0) { + /* + * Attempt to reconstruct the data from both P and Q. + */ + for (c = rm->rm_firstdatacol; c < rm->rm_cols - 1; c++) { + void *orig, *orig1; + rc = &rm->rm_col[c]; + + orig = zio_buf_alloc(rc->rc_size); + bcopy(rc->rc_data, orig, rc->rc_size); + + for (c1 = c + 1; c1 < rm->rm_cols; c1++) { + rc1 = &rm->rm_col[c1]; + + orig1 = zio_buf_alloc(rc1->rc_size); + bcopy(rc1->rc_data, orig1, rc1->rc_size); + + vdev_raidz_reconstruct_pq(rm, c, c1); + + if (zio_checksum_error(zio) == 0) { + zio_buf_free(orig, rc->rc_size); + zio_buf_free(orig1, rc1->rc_size); + zio->io_error = 0; + atomic_inc_64(&raidz_corrected_pq); + + /* + * If these children didn't know they + * returned bad data, inform them. + */ + if (rc->rc_tried && rc->rc_error == 0) + raidz_checksum_error(zio, rc); + if (rc1->rc_tried && rc1->rc_error == 0) + raidz_checksum_error(zio, rc1); + + rc->rc_error = ECKSUM; + rc1->rc_error = ECKSUM; + + goto done; + } + + bcopy(orig1, rc1->rc_data, rc1->rc_size); + zio_buf_free(orig1, rc1->rc_size); + } + + bcopy(orig, rc->rc_data, rc->rc_size); + zio_buf_free(orig, rc->rc_size); + } } /* - * All combinations failed to checksum. Generate checksum ereports for - * every one. + * All combinations failed to checksum. Generate checksum ereports for + * all children. */ zio->io_error = ECKSUM; if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { for (c = 0; c < rm->rm_cols; c++) { rc = &rm->rm_col[c]; zfs_ereport_post(FM_EREPORT_ZFS_CHECKSUM, - zio->io_spa, vd->vdev_child[rc->rc_col], zio, + zio->io_spa, vd->vdev_child[rc->rc_devidx], zio, rc->rc_offset, rc->rc_size); } } @@ -558,7 +1147,7 @@ done: for (c = 0; c < rm->rm_cols; c++) { rc = &rm->rm_col[c]; - cvd = vd->vdev_child[rc->rc_col]; + cvd = vd->vdev_child[rc->rc_devidx]; if (rc->rc_error == 0) continue; @@ -571,8 +1160,8 @@ done: zio_nowait(zio_vdev_child_io(rio, NULL, cvd, rc->rc_offset, rc->rc_data, rc->rc_size, ZIO_TYPE_WRITE, zio->io_priority, - ZIO_FLAG_IO_REPAIR | ZIO_FLAG_CANFAIL | - ZIO_FLAG_DONT_PROPAGATE, NULL, NULL)); + ZIO_FLAG_IO_REPAIR | ZIO_FLAG_DONT_PROPAGATE | + ZIO_FLAG_CANFAIL, NULL, NULL)); } zio_nowait(rio); @@ -587,7 +1176,7 @@ done: static void vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded) { - if (faulted > 1) + if (faulted > vd->vdev_nparity) vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_NO_REPLICAS); else if (degraded + faulted != 0) diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 0cff445cf3..137a402538 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -564,11 +564,18 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc) return (error); } -/* ARGSUSED */ static int zfs_ioc_vdev_remove(zfs_cmd_t *zc) { - return (ENOTSUP); + spa_t *spa; + int error; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error != 0) + return (error); + error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE); + spa_close(spa, FTAG); + return (error); } static int @@ -1176,6 +1183,12 @@ zfs_ioc_bookmark_name(zfs_cmd_t *zc) return (error); } +static int +zfs_ioc_promote(zfs_cmd_t *zc) +{ + return (dsl_dataset_promote(zc->zc_name)); +} + static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_pool_create, zfs_secpolicy_config, pool_name }, { zfs_ioc_pool_destroy, zfs_secpolicy_config, pool_name }, @@ -1215,7 +1228,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_inject_list_next, zfs_secpolicy_inject, no_name }, { zfs_ioc_error_log, zfs_secpolicy_inject, pool_name }, { zfs_ioc_clear, zfs_secpolicy_config, pool_name }, - { zfs_ioc_bookmark_name, zfs_secpolicy_inject, pool_name } + { zfs_ioc_bookmark_name, zfs_secpolicy_inject, pool_name }, + { zfs_ioc_promote, zfs_secpolicy_write, dataset_name } }; static int diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index bf7c9791fe..640ed4e960 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -1392,7 +1392,6 @@ zio_vdev_io_assess(zio_t *zio) /* XXPOLICY */ if (zio_should_retry(zio)) { ASSERT(tvd == vd); - ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)); zio->io_retries++; zio->io_error = 0; diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 5aaca0662b..07ada9c30e 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -106,12 +106,14 @@ int zfs_prop_readonly(zfs_prop_t); const char *zfs_prop_default_string(zfs_prop_t); uint64_t zfs_prop_default_numeric(zfs_prop_t); + /* - * On-disk format version. + * On-disk version number. */ #define ZFS_VERSION_1 1ULL #define ZFS_VERSION_2 2ULL -#define ZFS_VERSION ZFS_VERSION_2 +#define ZFS_VERSION_3 3ULL +#define ZFS_VERSION ZFS_VERSION_3 /* * Symbolic names for the changes that caused a ZFS_VERSION switch. @@ -126,6 +128,11 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t); */ #define ZFS_VERSION_INITIAL ZFS_VERSION_1 #define ZFS_VERSION_DITTO_BLOCKS ZFS_VERSION_2 +#define ZFS_VERSION_SPARES ZFS_VERSION_3 +#define ZFS_VERSION_RAID6 ZFS_VERSION_3 +#define ZFS_VERSION_BPLIST_ACCOUNT ZFS_VERSION_3 +#define ZFS_VERSION_RAIDZ_DEFLATE ZFS_VERSION_3 +#define ZFS_VERSION_DNODE_BYTES ZFS_VERSION_3 /* * The following are configuration names used in the nvlist describing a pool's @@ -156,6 +163,9 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t); #define ZPOOL_CONFIG_OFFLINE "offline" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" +#define ZPOOL_CONFIG_SPARES "spares" +#define ZPOOL_CONFIG_IS_SPARE "is_spare" +#define ZPOOL_CONFIG_NPARITY "nparity" #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" @@ -164,6 +174,7 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t); #define VDEV_TYPE_DISK "disk" #define VDEV_TYPE_FILE "file" #define VDEV_TYPE_MISSING "missing" +#define VDEV_TYPE_SPARE "spare" /* * This is needed in userland to report the minimum necessary device size. @@ -206,18 +217,20 @@ typedef enum vdev_aux { VDEV_AUX_TOO_SMALL, /* vdev size is too small */ VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */ VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */ - VDEV_AUX_VERSION_OLDER /* on-disk version is too old */ + VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */ + VDEV_AUX_SPARED /* hot spare used in another pool */ } vdev_aux_t; /* * pool state. The following states are written to disk as part of the normal - * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED. The remaining states are + * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE. The remaining states are * software abstractions used at various levels to communicate pool state. */ typedef enum pool_state { POOL_STATE_ACTIVE = 0, /* In active use */ POOL_STATE_EXPORTED, /* Explicitly exported */ POOL_STATE_DESTROYED, /* Explicitly destroyed */ + POOL_STATE_SPARE, /* Reserved for hot spare use */ POOL_STATE_UNINITIALIZED, /* Internal spa_t state */ POOL_STATE_UNAVAIL, /* Internal libzfs state */ POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */ @@ -256,6 +269,7 @@ typedef struct vdev_stat { uint64_t vs_aux; /* see vdev_aux_t */ uint64_t vs_alloc; /* space allocated */ uint64_t vs_space; /* total capacity */ + uint64_t vs_dspace; /* deflated capacity */ uint64_t vs_rsize; /* replaceable dev size */ uint64_t vs_ops[ZIO_TYPES]; /* operation count */ uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */ @@ -335,7 +349,8 @@ typedef enum zfs_ioc { ZFS_IOC_INJECT_LIST_NEXT, ZFS_IOC_ERROR_LOG, ZFS_IOC_CLEAR, - ZFS_IOC_BOOKMARK_NAME + ZFS_IOC_BOOKMARK_NAME, + ZFS_IOC_PROMOTE } zfs_ioc_t; /* |