summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreschrock <none@none>2006-05-30 15:47:16 -0700
committereschrock <none@none>2006-05-30 15:47:16 -0700
commit99653d4ee642c6528e88224f12409a5f23060994 (patch)
tree5cbcc540b8ed86b6a008f1084f9ca031368d926f
parent354a1801a85aa6b61ff4d5e290ab708ba57e56a3 (diff)
downloadillumos-joyent-99653d4ee642c6528e88224f12409a5f23060994.tar.gz
PSARC 2006/223 ZFS Hot Spares
PSARC 2006/303 ZFS Clone Promotion 6276916 support for "clone swap" 6288488 du reports misleading size on RAID-Z 6393490 libzfs should be a real library 6397148 fbufs debug code should be removed from buf_hash_insert() 6405966 Hot Spare support in ZFS 6409302 passing a non-root vdev via zpool_create() panics system 6415739 assertion failed: !(zio->io_flags & 0x00040) 6416759 ::dbufs does not find bonus buffers anymore 6417978 double parity RAID-Z a.k.a. RAID6 6424554 full block re-writes need not read data in 6425111 detaching an offline device can result in import confusion
-rw-r--r--usr/src/cmd/fm/modules/common/Makefile2
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-retire/Makefile33
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf29
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c231
-rw-r--r--usr/src/cmd/fm/schemes/zfs/scheme.c26
-rw-r--r--usr/src/cmd/fs.d/df.c38
-rw-r--r--usr/src/cmd/fs.d/zfs/fstyp/fstyp.c3
-rw-r--r--usr/src/cmd/mdb/common/modules/zfs/zfs.c71
-rw-r--r--usr/src/cmd/truss/codes.c2
-rw-r--r--usr/src/cmd/zdb/zdb.c23
-rw-r--r--usr/src/cmd/zfs/zfs_iter.c13
-rw-r--r--usr/src/cmd/zfs/zfs_iter.h9
-rw-r--r--usr/src/cmd/zfs/zfs_main.c359
-rw-r--r--usr/src/cmd/zfs/zfs_util.h10
-rw-r--r--usr/src/cmd/zinject/Makefile.com2
-rw-r--r--usr/src/cmd/zinject/translate.c20
-rw-r--r--usr/src/cmd/zinject/zinject.c13
-rw-r--r--usr/src/cmd/zinject/zinject.h2
-rw-r--r--usr/src/cmd/zoneadm/zfs.c119
-rw-r--r--usr/src/cmd/zoneadm/zoneadm.c3
-rw-r--r--usr/src/cmd/zoneadm/zoneadm.h1
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c21
-rw-r--r--usr/src/cmd/zpool/zpool_dataset.c32
-rw-r--r--usr/src/cmd/zpool/zpool_iter.c21
-rw-r--r--usr/src/cmd/zpool/zpool_main.c522
-rw-r--r--usr/src/cmd/zpool/zpool_util.h11
-rw-r--r--usr/src/cmd/zpool/zpool_vdev.c403
-rw-r--r--usr/src/cmd/ztest/ztest.c17
-rw-r--r--usr/src/lib/libdiskmgt/common/entry.c9
-rw-r--r--usr/src/lib/libdiskmgt/common/inuse_zpool.c36
-rw-r--r--usr/src/lib/libdiskmgt/common/libdiskmgt.h6
-rw-r--r--usr/src/lib/libzfs/common/libzfs.h127
-rw-r--r--usr/src/lib/libzfs/common/libzfs_changelist.c70
-rw-r--r--usr/src/lib/libzfs/common/libzfs_config.c165
-rw-r--r--usr/src/lib/libzfs/common/libzfs_dataset.c1644
-rw-r--r--usr/src/lib/libzfs/common/libzfs_graph.c198
-rw-r--r--usr/src/lib/libzfs/common/libzfs_impl.h49
-rw-r--r--usr/src/lib/libzfs/common/libzfs_import.c507
-rw-r--r--usr/src/lib/libzfs/common/libzfs_mount.c193
-rw-r--r--usr/src/lib/libzfs/common/libzfs_pool.c978
-rw-r--r--usr/src/lib/libzfs/common/libzfs_status.c16
-rw-r--r--usr/src/lib/libzfs/common/libzfs_util.c429
-rw-r--r--usr/src/lib/libzfs/spec/libzfs.spec53
-rw-r--r--usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c18
-rw-r--r--usr/src/lib/libzfs_jni/common/libzfs_jni_main.c24
-rw-r--r--usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c3
-rw-r--r--usr/src/lib/libzfs_jni/common/libzfs_jni_util.h3
-rw-r--r--usr/src/lib/libzpool/common/util.c10
-rw-r--r--usr/src/pkgdefs/SUNWfmd/prototype_com2
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c5
-rw-r--r--usr/src/uts/common/fs/zfs/bplist.c59
-rw-r--r--usr/src/uts/common/fs/zfs/dbuf.c10
-rw-r--r--usr/src/uts/common/fs/zfs/dmu.c15
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_objset.c22
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_tx.c28
-rw-r--r--usr/src/uts/common/fs/zfs/dnode.c43
-rw-r--r--usr/src/uts/common/fs/zfs/dnode_sync.c6
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dataset.c264
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dir.c63
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_pool.c2
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_prop.c102
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c905
-rw-r--r--usr/src/uts/common/fs/zfs/spa_config.c8
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c133
-rw-r--r--usr/src/uts/common/fs/zfs/sys/bplist.h11
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu.h5
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dnode.h10
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_dataset.h14
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_dir.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_prop.h9
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h16
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa_impl.h8
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev.h13
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h9
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c172
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_label.c152
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_mirror.c15
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz.c927
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c20
-rw-r--r--usr/src/uts/common/fs/zfs/zio.c1
-rw-r--r--usr/src/uts/common/sys/fs/zfs.h25
81 files changed, 6423 insertions, 3226 deletions
diff --git a/usr/src/cmd/fm/modules/common/Makefile b/usr/src/cmd/fm/modules/common/Makefile
index 868a66df08..ef5ebba6f0 100644
--- a/usr/src/cmd/fm/modules/common/Makefile
+++ b/usr/src/cmd/fm/modules/common/Makefile
@@ -27,6 +27,6 @@
#
SUBDIRS = cpumem-retire eversholt io-retire ip-transport snmp-trapgen \
- syslog-msgs zfs-diagnosis
+ syslog-msgs zfs-diagnosis zfs-retire
include ../../Makefile.subdirs
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/Makefile b/usr/src/cmd/fm/modules/common/zfs-retire/Makefile
new file mode 100644
index 0000000000..9d80ae77ee
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+MODULE = zfs-retire
+CLASS = common
+SRCS = zfs_retire.c
+
+include ../../Makefile.plugin
+
+LDLIBS += -lzfs
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
new file mode 100644
index 0000000000..f506384bff
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# fmd configuration file for the zfs retire agent.
+#
+subscribe fault.fs.zfs.device
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
new file mode 100644
index 0000000000..962b37bb82
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
@@ -0,0 +1,231 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * The ZFS retire agent is responsible for managing hot spares across all pools.
+ * When we see a device fault, we try to open the associated pool and look for
+ * any hot spares. We iterate over any available hot spares and attempt a
+ * 'zpool replace' for each one.
+ */
+
+#include <fm/fmd_api.h>
+#include <sys/fs/zfs.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/fs/zfs.h>
+#include <libzfs.h>
+
+/*
+ * Find a pool with a matching GUID.
+ */
+typedef struct find_cbdata {
+ uint64_t cb_guid;
+ zpool_handle_t *cb_zhp;
+} find_cbdata_t;
+
+static int
+find_pool(zpool_handle_t *zhp, void *data)
+{
+ find_cbdata_t *cbp = data;
+
+ if (cbp->cb_guid == zpool_get_guid(zhp)) {
+ cbp->cb_zhp = zhp;
+ return (1);
+ }
+
+ zpool_close(zhp);
+ return (0);
+}
+
+/*
+ * Find a vdev within a tree with a matching GUID.
+ */
+static nvlist_t *
+find_vdev(nvlist_t *nv, uint64_t search)
+{
+ uint64_t guid;
+ nvlist_t **child;
+ uint_t c, children;
+ nvlist_t *ret;
+
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
+ guid == search)
+ return (nv);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0)
+ return (NULL);
+
+ for (c = 0; c < children; c++) {
+ if ((ret = find_vdev(child[c], search)) != NULL)
+ return (ret);
+ }
+
+ return (NULL);
+}
+
+/*ARGSUSED*/
+static void
+zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
+ const char *class)
+{
+ uint64_t pool_guid, vdev_guid;
+ char *dev_name;
+ zpool_handle_t *zhp;
+ nvlist_t *resource, *config, *nvroot;
+ nvlist_t *vdev;
+ nvlist_t **spares, **faults;
+ uint_t s, nspares, f, nfaults;
+ nvlist_t *replacement;
+ find_cbdata_t cb;
+ libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
+
+ /*
+ * Get information from the fault.
+ */
+ if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
+ &faults, &nfaults) != 0)
+ return;
+
+ for (f = 0; f < nfaults; f++) {
+ if (nvlist_lookup_nvlist(faults[f], FM_FAULT_RESOURCE,
+ &resource) != 0 ||
+ nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL,
+ &pool_guid) != 0 ||
+ nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV,
+ &vdev_guid) != 0)
+ continue;
+
+ /*
+ * From the pool guid and vdev guid, get the pool name and
+ * device name.
+ */
+ cb.cb_guid = pool_guid;
+ if (zpool_iter(zhdl, find_pool, &cb) != 1)
+ continue;
+
+ zhp = cb.cb_zhp;
+ config = zpool_get_config(zhp, NULL);
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) != 0) {
+ zpool_close(zhp);
+ continue;
+ }
+
+ if ((vdev = find_vdev(nvroot, vdev_guid)) == NULL) {
+ zpool_close(zhp);
+ continue;
+ }
+
+ /*
+ * Find out if there are any hot spares available in the pool.
+ */
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) != 0) {
+ zpool_close(zhp);
+ continue;
+ }
+
+ if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) {
+ zpool_close(zhp);
+ continue;
+ }
+
+ if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_ROOT) != 0) {
+ nvlist_free(replacement);
+ zpool_close(zhp);
+ continue;
+ }
+
+ dev_name = zpool_vdev_name(zhdl, zhp, vdev);
+
+ /*
+ * Try to replace each spare, ending when we successfully
+ * replace it.
+ */
+ for (s = 0; s < nspares; s++) {
+ char *spare_name;
+
+ if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
+ &spare_name) != 0)
+ continue;
+
+ if (nvlist_add_nvlist_array(replacement,
+ ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0)
+ continue;
+
+ if (zpool_vdev_attach(zhp, dev_name, spare_name,
+ replacement, B_TRUE) == 0)
+ break;
+ }
+
+ free(dev_name);
+ nvlist_free(replacement);
+ zpool_close(zhp);
+ }
+}
+
+static const fmd_hdl_ops_t fmd_ops = {
+ zfs_retire_recv, /* fmdo_recv */
+ NULL, /* fmdo_timeout */
+ NULL, /* fmdo_close */
+ NULL, /* fmdo_stats */
+ NULL, /* fmdo_gc */
+};
+
+static const fmd_prop_t fmd_props[] = {
+ { NULL, 0, NULL }
+};
+
+static const fmd_hdl_info_t fmd_info = {
+ "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props
+};
+
+void
+_fmd_init(fmd_hdl_t *hdl)
+{
+ libzfs_handle_t *zhdl;
+
+ if ((zhdl = libzfs_init()) == NULL)
+ return;
+
+ if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
+ libzfs_fini(zhdl);
+ return;
+ }
+
+ fmd_hdl_setspecific(hdl, zhdl);
+}
+
+void
+_fmd_fini(fmd_hdl_t *hdl)
+{
+ libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
+
+ if (zhdl != NULL)
+ libzfs_fini(zhdl);
+}
diff --git a/usr/src/cmd/fm/schemes/zfs/scheme.c b/usr/src/cmd/fm/schemes/zfs/scheme.c
index 7f2532a637..e28f7b231c 100644
--- a/usr/src/cmd/fm/schemes/zfs/scheme.c
+++ b/usr/src/cmd/fm/schemes/zfs/scheme.c
@@ -34,6 +34,8 @@ typedef struct cbdata {
zpool_handle_t *cb_pool;
} cbdata_t;
+libzfs_handle_t *g_zfs;
+
static int
find_pool(zpool_handle_t *zhp, void *data)
{
@@ -66,7 +68,7 @@ fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
cb.cb_guid = pool_guid;
cb.cb_pool = NULL;
- if (zpool_iter(find_pool, &cb) == 1) {
+ if (zpool_iter(g_zfs, find_pool, &cb) == 1) {
name = zpool_get_name(cb.cb_pool);
} else {
(void) snprintf(guidbuf, sizeof (guidbuf), "%llx", pool_guid);
@@ -135,7 +137,7 @@ fmd_fmri_present(nvlist_t *nvl)
cb.cb_guid = pool_guid;
cb.cb_pool = NULL;
- if (zpool_iter(find_pool, &cb) != 1)
+ if (zpool_iter(g_zfs, find_pool, &cb) != 1)
return (0);
if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) != 0) {
@@ -163,7 +165,7 @@ fmd_fmri_unusable(nvlist_t *nvl)
cb.cb_guid = pool_guid;
cb.cb_pool = NULL;
- if (zpool_iter(find_pool, &cb) != 1)
+ if (zpool_iter(g_zfs, find_pool, &cb) != 1)
return (1);
if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) != 0) {
@@ -189,3 +191,21 @@ fmd_fmri_unusable(nvlist_t *nvl)
return (ret);
}
+
+int
+fmd_fmri_init(void)
+{
+ g_zfs = libzfs_init();
+
+ if (g_zfs == NULL)
+ return (-1);
+ else
+ return (0);
+}
+
+void
+fmd_fmri_fini(void)
+{
+ if (g_zfs)
+ libzfs_fini(g_zfs);
+}
diff --git a/usr/src/cmd/fs.d/df.c b/usr/src/cmd/fs.d/df.c
index 0a38f44b1a..3ee66576a5 100644
--- a/usr/src/cmd/fs.d/df.c
+++ b/usr/src/cmd/fs.d/df.c
@@ -237,55 +237,43 @@ static void do_df(int, char **) __NORETURN;
static void parse_options(int, char **);
static char *basename(char *);
-
-/* ARGSUSED */
-static void
-dummy_error_handler(const char *fmt, va_list ap)
-{
- /* Do nothing */
-}
-
-static zfs_handle_t *(*_zfs_open)(const char *, int);
+static libzfs_handle_t *(*_libzfs_init)(boolean_t);
+static zfs_handle_t *(*_zfs_open)(libzfs_handle_t *, const char *, int);
static void (*_zfs_close)(zfs_handle_t *);
static uint64_t (*_zfs_prop_get_int)(zfs_handle_t *, zfs_prop_t);
-static void (*_zfs_set_error_handler)(void (*)(const char *, va_list));
+static libzfs_handle_t *g_zfs;
/*
* Dynamically check for libzfs, in case the user hasn't installed the SUNWzfs
* packages. A basic utility such as df shouldn't depend on optional
* filesystems.
*/
-static int
+static boolean_t
load_libzfs(void)
{
void *hdl;
- if (_zfs_open != NULL)
- return (1);
+ if (_libzfs_init != NULL)
+ return (g_zfs != NULL);
if ((hdl = dlopen("libzfs.so", RTLD_LAZY)) != NULL) {
- _zfs_set_error_handler = (void (*)())
- dlsym(hdl, "zfs_set_error_handler");
+ _libzfs_init = (libzfs_handle_t *(*)(boolean_t))dlsym(hdl,
+ "libzfs_init");
_zfs_open = (zfs_handle_t *(*)())dlsym(hdl, "zfs_open");
_zfs_close = (void (*)())dlsym(hdl, "zfs_close");
_zfs_prop_get_int = (uint64_t (*)())
dlsym(hdl, "zfs_prop_get_int");
- if (_zfs_set_error_handler != NULL) {
+ if (_libzfs_init != NULL) {
assert(_zfs_open != NULL);
assert(_zfs_close != NULL);
assert(_zfs_prop_get_int != NULL);
- /*
- * Disable ZFS error reporting, so we don't get messages
- * like "can't open ..." under race conditions.
- */
- _zfs_set_error_handler(dummy_error_handler);
- return (1);
+ g_zfs = _libzfs_init(B_FALSE);
}
}
- return (0);
+ return (g_zfs != NULL);
}
int
@@ -1257,7 +1245,7 @@ adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total,
do {
*slash = '\0';
- if ((zhp = _zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) {
+ if ((zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL) {
free(dataset);
return;
}
@@ -1274,7 +1262,7 @@ adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total,
} while ((slash = strrchr(dataset, '/')) != NULL);
- if ((zhp = _zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) {
+ if ((zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL) {
free(dataset);
return;
}
diff --git a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
index 26376e36a6..6a8585d872 100644
--- a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
+++ b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
@@ -142,7 +142,8 @@ main(int argc, char **argv)
return (1);
}
- if ((config = zpool_read_label(fd)) == NULL)
+ if (zpool_read_label(fd, &config) != 0 ||
+ config == NULL)
return (1);
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index 5b218aee5f..73b1cbef62 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -208,73 +208,6 @@ freelist_walk_fini(mdb_walk_state_t *wsp)
{
}
-typedef struct dbuf_walk_data {
- dbuf_hash_table_t ht;
- int64_t bucket;
- uintptr_t dbp;
- dmu_buf_impl_t db;
-} dbuf_walk_data_t;
-
-static int
-dbuf_walk_init(mdb_walk_state_t *wsp)
-{
- dbuf_walk_data_t *dwd;
-
- if (wsp->walk_addr != NULL) {
- mdb_warn("must supply starting address\n");
- return (WALK_ERR);
- }
-
- dwd = mdb_alloc(sizeof (dbuf_walk_data_t), UM_SLEEP);
-
- if (mdb_readvar(&dwd->ht, "dbuf_hash_table") == -1) {
- mdb_warn("failed to read 'dbuf_hash_table'");
- mdb_free(dwd, sizeof (dbuf_walk_data_t));
- return (WALK_ERR);
- }
- dwd->bucket = -1;
- dwd->dbp = 0;
- wsp->walk_data = dwd;
- return (WALK_NEXT);
-}
-
-static int
-dbuf_walk_step(mdb_walk_state_t *wsp)
-{
- int status;
- dbuf_walk_data_t *dwd = wsp->walk_data;
-
- while (dwd->dbp == 0) {
- dwd->bucket++;
- if (dwd->bucket == dwd->ht.hash_table_mask+1)
- return (WALK_DONE);
-
- if (mdb_vread(&dwd->dbp, sizeof (void *),
- (uintptr_t)(dwd->ht.hash_table+dwd->bucket)) == -1) {
- mdb_warn("failed to read hash bucket %u at %p",
- dwd->bucket, dwd->ht.hash_table+dwd->bucket);
- return (WALK_DONE);
- }
- }
-
- wsp->walk_addr = dwd->dbp;
- if (mdb_vread(&dwd->db, sizeof (dmu_buf_impl_t),
- wsp->walk_addr) == -1) {
- mdb_warn("failed to read dbuf at %p", wsp->walk_addr);
- return (WALK_DONE);
- }
- status = wsp->walk_callback(wsp->walk_addr, &dwd->db, wsp->walk_cbdata);
-
- dwd->dbp = (uintptr_t)dwd->db.db_hash_next;
- return (status);
-}
-
-static void
-dbuf_walk_fini(mdb_walk_state_t *wsp)
-{
- dbuf_walk_data_t *dwd = wsp->walk_data;
- mdb_free(dwd, sizeof (dbuf_walk_data_t));
-}
static int
dataset_name(uintptr_t addr, char *buf)
@@ -693,7 +626,7 @@ dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
return (DCMD_ERR);
}
- if (mdb_pwalk("dbufs", dbufs_cb, &data, 0) != 0) {
+ if (mdb_pwalk("dmu_buf_impl_t", dbufs_cb, &data, 0) != 0) {
mdb_warn("can't walk dbufs");
return (DCMD_ERR);
}
@@ -1580,8 +1513,6 @@ static const mdb_walker_t walkers[] = {
{ LIST_WALK_NAME, LIST_WALK_DESC,
list_walk_init, list_walk_step, list_walk_fini },
#endif
- { "dbufs", "walk cached ZFS dbufs",
- dbuf_walk_init, dbuf_walk_step, dbuf_walk_fini },
{ "zms_freelist", "walk ZFS metaslab freelist",
freelist_walk_init, freelist_walk_step, freelist_walk_fini },
{ "txg_list", "given any txg_list_t *, walk all entries in all txgs",
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c
index 4e808b8e48..37e79f6322 100644
--- a/usr/src/cmd/truss/codes.c
+++ b/usr/src/cmd/truss/codes.c
@@ -937,6 +937,8 @@ const struct ioc {
"zfs_cmd_t" },
{ (uint_t)ZFS_IOC_BOOKMARK_NAME, "ZFS_IOC_BOOKMARK_NAME",
"zfs_cmd_t" },
+ { (uint_t)ZFS_IOC_PROMOTE, "ZFS_IOC_PROMOTE",
+ "zfs_cmd_t" },
/* kssl ioctls */
{ (uint_t)KSSL_ADD_ENTRY, "KSSL_ADD_ENTRY",
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index f283148ef8..0af9a59690 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -744,8 +744,8 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)ds->ds_fsid_guid);
(void) printf("\t\tguid = %llu\n",
(u_longlong_t)ds->ds_guid);
- (void) printf("\t\tinconsistent = %llu\n",
- (u_longlong_t)ds->ds_inconsistent);
+ (void) printf("\t\tflags = %llx\n",
+ (u_longlong_t)ds->ds_flags);
(void) printf("\t\tbp = %s\n", blkbuf);
}
@@ -755,7 +755,9 @@ dump_bplist(objset_t *mos, uint64_t object, char *name)
bplist_t bpl = { 0 };
blkptr_t blk, *bp = &blk;
uint64_t itor = 0;
- char numbuf[6];
+ char bytes[6];
+ char comp[6];
+ char uncomp[6];
if (dump_opt['d'] < 3)
return;
@@ -766,10 +768,17 @@ dump_bplist(objset_t *mos, uint64_t object, char *name)
return;
}
- nicenum(bpl.bpl_phys->bpl_bytes, numbuf);
-
- (void) printf("\n %s: %llu entries, %s\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries, numbuf);
+ nicenum(bpl.bpl_phys->bpl_bytes, bytes);
+ if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
+ nicenum(bpl.bpl_phys->bpl_comp, comp);
+ nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
+ (void) printf("\n %s: %llu entries, %s (%s/%s comp)\n",
+ name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
+ bytes, comp, uncomp);
+ } else {
+ (void) printf("\n %s: %llu entries, %s\n",
+ name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
+ }
if (dump_opt['d'] < 5) {
bplist_close(&bpl);
diff --git a/usr/src/cmd/zfs/zfs_iter.c b/usr/src/cmd/zfs/zfs_iter.c
index bc8e5ea59c..9f8f37b765 100644
--- a/usr/src/cmd/zfs/zfs_iter.c
+++ b/usr/src/cmd/zfs/zfs_iter.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -160,7 +159,7 @@ zfs_compare(const void *larg, const void *rarg, void *unused)
}
int
-zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types,
+zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
zfs_iter_f callback, void *data)
{
callback_data_t cb;
@@ -190,7 +189,7 @@ zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types,
* If given no arguments, iterate over all datasets.
*/
cb.cb_recurse = 1;
- ret = zfs_iter_root(zfs_callback, &cb);
+ ret = zfs_iter_root(g_zfs, zfs_callback, &cb);
} else {
int i;
zfs_handle_t *zhp;
@@ -209,8 +208,8 @@ zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types,
}
for (i = 0; i < argc; i++) {
- if ((zhp = zfs_open(argv[i], argtype)) != NULL)
- ret = zfs_callback(zhp, &cb);
+ if ((zhp = zfs_open(g_zfs, argv[i], argtype)) != NULL)
+ ret |= zfs_callback(zhp, &cb);
else
ret = 1;
}
diff --git a/usr/src/cmd/zfs/zfs_iter.h b/usr/src/cmd/zfs/zfs_iter.h
index 03428b827b..c69049b28f 100644
--- a/usr/src/cmd/zfs/zfs_iter.h
+++ b/usr/src/cmd/zfs/zfs_iter.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -33,7 +32,7 @@
extern "C" {
#endif
-int zfs_for_each(int, char **, int, zfs_type_t, zfs_iter_f, void *);
+int zfs_for_each(int, char **, boolean_t, zfs_type_t, zfs_iter_f, void *);
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index 5b04a76f47..0fba9046bd 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -47,6 +47,9 @@
#include <libzfs.h>
#include "zfs_iter.h"
+#include "zfs_util.h"
+
+libzfs_handle_t *g_zfs;
static FILE *mnttab_file;
@@ -66,6 +69,7 @@ static int zfs_do_share(int argc, char **argv);
static int zfs_do_unshare(int argc, char **argv);
static int zfs_do_send(int argc, char **argv);
static int zfs_do_receive(int argc, char **argv);
+static int zfs_do_promote(int argc, char **argv);
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -91,6 +95,7 @@ typedef enum {
HELP_INHERIT,
HELP_LIST,
HELP_MOUNT,
+ HELP_PROMOTE,
HELP_RECEIVE,
HELP_RENAME,
HELP_ROLLBACK,
@@ -124,6 +129,7 @@ static zfs_command_t command_table[] = {
{ "snapshot", zfs_do_snapshot, HELP_SNAPSHOT },
{ "rollback", zfs_do_rollback, HELP_ROLLBACK },
{ "clone", zfs_do_clone, HELP_CLONE },
+ { "promote", zfs_do_promote, HELP_PROMOTE },
{ "rename", zfs_do_rename, HELP_RENAME },
{ NULL },
{ "list", zfs_do_list, HELP_LIST },
@@ -176,6 +182,8 @@ get_usage(zfs_help_t idx)
return (gettext("\tmount\n"
"\tmount [-o opts] [-O] -a\n"
"\tmount [-o opts] [-O] <filesystem>\n"));
+ case HELP_PROMOTE:
+ return (gettext("\tpromote <clone filesystem>\n"));
case HELP_RECEIVE:
return (gettext("\treceive [-vn] <filesystem|volume|snapshot>\n"
"\treceive [-vn] -d <filesystem>\n"));
@@ -228,10 +236,10 @@ safe_malloc(size_t size)
* a complete usage message.
*/
static void
-usage(int requested)
+usage(boolean_t requested)
{
int i;
- int show_properties = FALSE;
+ boolean_t show_properties = B_FALSE;
FILE *fp = requested ? stdout : stderr;
if (current_command == NULL) {
@@ -260,7 +268,7 @@ usage(int requested)
strcmp(current_command->name, "get") == 0 ||
strcmp(current_command->name, "inherit") == 0 ||
strcmp(current_command->name, "list") == 0)
- show_properties = TRUE;
+ show_properties = B_TRUE;
if (show_properties) {
@@ -313,27 +321,27 @@ zfs_do_clone(int argc, char **argv)
if (argc > 1 && argv[1][0] == '-') {
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
argv[1][1]);
- usage(FALSE);
+ usage(B_FALSE);
}
/* check number of arguments */
if (argc < 2) {
(void) fprintf(stderr, gettext("missing source dataset "
"argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 3) {
(void) fprintf(stderr, gettext("missing target dataset "
"argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 3) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/* open the source dataset */
- if ((zhp = zfs_open(argv[1], ZFS_TYPE_SNAPSHOT)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_SNAPSHOT)) == NULL)
return (1);
/* pass to libzfs */
@@ -341,7 +349,7 @@ zfs_do_clone(int argc, char **argv)
/* create the mountpoint if necessary */
if (ret == 0) {
- zfs_handle_t *clone = zfs_open(argv[2], ZFS_TYPE_ANY);
+ zfs_handle_t *clone = zfs_open(g_zfs, argv[2], ZFS_TYPE_ANY);
if (clone != NULL) {
if ((ret = zfs_mount(clone, NULL, 0)) == 0)
ret = zfs_share(clone);
@@ -374,7 +382,7 @@ zfs_do_create(int argc, char **argv)
char *size = NULL;
char *blocksize = NULL;
int c;
- int noreserve = FALSE;
+ boolean_t noreserve = B_FALSE;
int ret;
/* check options */
@@ -388,24 +396,24 @@ zfs_do_create(int argc, char **argv)
blocksize = optarg;
break;
case 's':
- noreserve = TRUE;
+ noreserve = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing size "
"argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
if (noreserve && type != ZFS_TYPE_VOLUME) {
(void) fprintf(stderr, gettext("'-s' can only be used when "
"creating a volume\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
argc -= optind;
@@ -415,18 +423,18 @@ zfs_do_create(int argc, char **argv)
if (argc == 0) {
(void) fprintf(stderr, gettext("missing %s argument\n"),
zfs_type_to_name(type));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/* pass to libzfs */
- if (zfs_create(argv[0], type, size, blocksize) != 0)
+ if (zfs_create(g_zfs, argv[0], type, size, blocksize) != 0)
return (1);
- if ((zhp = zfs_open(argv[0], ZFS_TYPE_ANY)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
return (1);
/*
@@ -476,7 +484,7 @@ zfs_do_create(int argc, char **argv)
* either be a child, or a clone of a child.
*/
typedef struct destroy_cbdata {
- int cb_first;
+ boolean_t cb_first;
int cb_force;
int cb_recurse;
int cb_error;
@@ -511,7 +519,7 @@ destroy_check_dependent(zfs_handle_t *zhp, void *data)
zfs_type_to_name(zfs_get_type(cbp->cb_target)));
(void) fprintf(stderr, gettext("use '-r' to destroy "
"the following datasets:\n"));
- cbp->cb_first = 0;
+ cbp->cb_first = B_FALSE;
cbp->cb_error = 1;
}
@@ -532,7 +540,7 @@ destroy_check_dependent(zfs_handle_t *zhp, void *data)
zfs_type_to_name(zfs_get_type(cbp->cb_target)));
(void) fprintf(stderr, gettext("use '-R' to destroy "
"the following datasets:\n"));
- cbp->cb_first = 0;
+ cbp->cb_first = B_FALSE;
cbp->cb_error = 1;
}
@@ -597,7 +605,7 @@ zfs_do_destroy(int argc, char **argv)
default:
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -607,15 +615,15 @@ zfs_do_destroy(int argc, char **argv)
/* check number of arguments */
if (argc == 0) {
(void) fprintf(stderr, gettext("missing path argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/* Open the given dataset */
- if ((zhp = zfs_open(argv[0], ZFS_TYPE_ANY)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
return (1);
cb.cb_target = zhp;
@@ -641,7 +649,7 @@ zfs_do_destroy(int argc, char **argv)
/*
* Check for any dependents and/or clones.
*/
- cb.cb_first = 1;
+ cb.cb_first = B_TRUE;
if (!cb.cb_doclones)
(void) zfs_iter_dependents(zhp, destroy_check_dependent, &cb);
@@ -678,13 +686,13 @@ zfs_do_destroy(int argc, char **argv)
* columns to display as well as which property types to allow.
*/
typedef struct get_cbdata {
- int cb_scripted;
int cb_sources;
- int cb_literal;
int cb_columns[4];
- zfs_prop_t cb_prop[ZFS_NPROP_ALL];
int cb_nprop;
- int cb_isall;
+ boolean_t cb_scripted;
+ boolean_t cb_literal;
+ boolean_t cb_isall;
+ zfs_prop_t cb_prop[ZFS_NPROP_ALL];
} get_cbdata_t;
#define GET_COL_NAME 1
@@ -804,7 +812,7 @@ static int
zfs_do_get(int argc, char **argv)
{
get_cbdata_t cb = { 0 };
- int recurse = 0;
+ boolean_t recurse = B_FALSE;
int c;
char *value, *fields, *badopt;
int i;
@@ -823,18 +831,18 @@ zfs_do_get(int argc, char **argv)
while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) {
switch (c) {
case 'p':
- cb.cb_literal = TRUE;
+ cb.cb_literal = B_TRUE;
break;
case 'r':
- recurse = TRUE;
+ recurse = B_TRUE;
break;
case 'H':
- cb.cb_scripted = TRUE;
+ cb.cb_scripted = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case 'o':
/*
@@ -852,7 +860,7 @@ zfs_do_get(int argc, char **argv)
(void) fprintf(stderr, gettext("too "
"many fields given to -o "
"option\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
switch (getsubopt(&optarg, col_subopts,
@@ -873,7 +881,7 @@ zfs_do_get(int argc, char **argv)
(void) fprintf(stderr,
gettext("invalid column name "
"'%s'\n"), value);
- usage(FALSE);
+ usage(B_FALSE);
}
}
break;
@@ -906,7 +914,7 @@ zfs_do_get(int argc, char **argv)
(void) fprintf(stderr,
gettext("invalid source "
"'%s'\n"), value);
- usage(FALSE);
+ usage(B_FALSE);
}
}
break;
@@ -914,7 +922,7 @@ zfs_do_get(int argc, char **argv)
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -924,7 +932,7 @@ zfs_do_get(int argc, char **argv)
if (argc < 1) {
(void) fprintf(stderr, gettext("missing property "
"argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
fields = argv[0];
@@ -935,7 +943,7 @@ zfs_do_get(int argc, char **argv)
* given dataset.
*/
if (strcmp(fields, "all") == 0)
- cb.cb_isall = TRUE;
+ cb.cb_isall = B_TRUE;
if ((ret = zfs_get_proplist(fields, cb.cb_prop, ZFS_NPROP_ALL,
&cb.cb_nprop, &badopt)) != 0) {
@@ -945,7 +953,7 @@ zfs_do_get(int argc, char **argv)
else
(void) fprintf(stderr, gettext("too many properties "
"specified\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
argc--;
@@ -954,7 +962,7 @@ zfs_do_get(int argc, char **argv)
/* check for at least one dataset name */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing dataset argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/*
@@ -1008,7 +1016,7 @@ inherit_callback(zfs_handle_t *zhp, void *data)
static int
zfs_do_inherit(int argc, char **argv)
{
- int recurse = 0;
+ boolean_t recurse = B_FALSE;
int c;
zfs_prop_t prop;
char *propname;
@@ -1017,13 +1025,13 @@ zfs_do_inherit(int argc, char **argv)
while ((c = getopt(argc, argv, "r")) != -1) {
switch (c) {
case 'r':
- recurse = TRUE;
+ recurse = B_TRUE;
break;
case '?':
default:
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1033,11 +1041,11 @@ zfs_do_inherit(int argc, char **argv)
/* check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing property argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 2) {
(void) fprintf(stderr, gettext("missing dataset argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
propname = argv[0];
@@ -1050,7 +1058,7 @@ zfs_do_inherit(int argc, char **argv)
if ((prop = zfs_name_to_prop(propname)) == ZFS_PROP_INVAL) {
(void) fprintf(stderr, gettext("invalid property '%s'\n"),
propname);
- usage(FALSE);
+ usage(B_FALSE);
}
if (zfs_prop_readonly(prop)) {
(void) fprintf(stderr, gettext("%s property is read-only\n"),
@@ -1083,8 +1091,8 @@ zfs_do_inherit(int argc, char **argv)
* '-r' is specified.
*/
typedef struct list_cbdata {
- int cb_first;
- int cb_scripted;
+ boolean_t cb_first;
+ boolean_t cb_scripted;
zfs_prop_t cb_fields[ZFS_NPROP_ALL];
int cb_fieldcount;
} list_cbdata_t;
@@ -1129,7 +1137,7 @@ print_dataset(zfs_handle_t *zhp, zfs_prop_t *fields, size_t count, int scripted)
}
if (zfs_prop_get(zhp, fields[i], property,
- sizeof (property), NULL, NULL, 0, FALSE) != 0)
+ sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
(void) strlcpy(property, "-", sizeof (property));
/*
@@ -1159,7 +1167,7 @@ list_callback(zfs_handle_t *zhp, void *data)
if (cbp->cb_first) {
if (!cbp->cb_scripted)
print_header(cbp->cb_fields, cbp->cb_fieldcount);
- cbp->cb_first = FALSE;
+ cbp->cb_first = B_FALSE;
}
print_dataset(zhp, cbp->cb_fields, cbp->cb_fieldcount,
@@ -1172,8 +1180,8 @@ static int
zfs_do_list(int argc, char **argv)
{
int c;
- int recurse = 0;
- int scripted = FALSE;
+ boolean_t recurse = B_FALSE;
+ boolean_t scripted = B_FALSE;
static char default_fields[] =
"name,used,available,referenced,mountpoint";
int types = ZFS_TYPE_ANY;
@@ -1193,10 +1201,10 @@ zfs_do_list(int argc, char **argv)
fields = optarg;
break;
case 'r':
- recurse = TRUE;
+ recurse = B_TRUE;
break;
case 'H':
- scripted = TRUE;
+ scripted = B_TRUE;
break;
case 't':
types = 0;
@@ -1216,19 +1224,19 @@ zfs_do_list(int argc, char **argv)
(void) fprintf(stderr,
gettext("invalid type '%s'\n"),
value);
- usage(FALSE);
+ usage(B_FALSE);
}
}
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1258,16 +1266,16 @@ zfs_do_list(int argc, char **argv)
else
(void) fprintf(stderr, gettext("too many properties "
"specified\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
cb.cb_fieldcount += alloffset;
cb.cb_scripted = scripted;
- cb.cb_first = TRUE;
+ cb.cb_first = B_TRUE;
ret = zfs_for_each(argc, argv, recurse, types, list_callback, &cb);
- if (ret == 0 && cb.cb_first == TRUE)
+ if (ret == 0 && cb.cb_first)
(void) printf(gettext("no datasets available\n"));
return (ret);
@@ -1283,39 +1291,76 @@ static int
zfs_do_rename(int argc, char **argv)
{
zfs_handle_t *zhp;
- int ret = 1;
+ int ret;
/* check options */
if (argc > 1 && argv[1][0] == '-') {
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
argv[1][1]);
- usage(FALSE);
+ usage(B_FALSE);
}
/* check number of arguments */
if (argc < 2) {
(void) fprintf(stderr, gettext("missing source dataset "
"argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 3) {
(void) fprintf(stderr, gettext("missing target dataset "
"argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 3) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
- if ((zhp = zfs_open(argv[1], ZFS_TYPE_ANY)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_ANY)) == NULL)
return (1);
- if (zfs_rename(zhp, argv[2]) != 0)
- goto error;
+ ret = (zfs_rename(zhp, argv[2]) != 0);
+
+ zfs_close(zhp);
+ return (ret);
+}
+
+/*
+ * zfs promote <fs>
+ *
+ * Promotes the given clone fs to be the parent
+ */
+/* ARGSUSED */
+static int
+zfs_do_promote(int argc, char **argv)
+{
+ zfs_handle_t *zhp;
+ int ret;
+
+ /* check options */
+ if (argc > 1 && argv[1][0] == '-') {
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ argv[1][1]);
+ usage(B_FALSE);
+ }
+
+ /* check number of arguments */
+ if (argc < 2) {
+ (void) fprintf(stderr, gettext("missing clone filesystem"
+ "argument\n"));
+ usage(B_FALSE);
+ }
+ if (argc > 2) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
+
+ zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL)
+ return (1);
+
+ ret = (zfs_promote(zhp) != 0);
- ret = 0;
-error:
zfs_close(zhp);
return (ret);
}
@@ -1333,12 +1378,12 @@ error:
*/
typedef struct rollback_cbdata {
uint64_t cb_create;
- int cb_first;
+ boolean_t cb_first;
int cb_doclones;
char *cb_target;
int cb_error;
- int cb_recurse;
- int cb_dependent;
+ boolean_t cb_recurse;
+ boolean_t cb_dependent;
} rollback_cbdata_t;
/*
@@ -1352,8 +1397,10 @@ rollback_check(zfs_handle_t *zhp, void *data)
{
rollback_cbdata_t *cbp = data;
- if (cbp->cb_doclones)
+ if (cbp->cb_doclones) {
+ zfs_close(zhp);
return (0);
+ }
if (!cbp->cb_dependent) {
if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 &&
@@ -1374,10 +1421,10 @@ rollback_check(zfs_handle_t *zhp, void *data)
}
if (cbp->cb_recurse) {
- cbp->cb_dependent = TRUE;
+ cbp->cb_dependent = B_TRUE;
(void) zfs_iter_dependents(zhp, rollback_check,
cbp);
- cbp->cb_dependent = FALSE;
+ cbp->cb_dependent = B_FALSE;
} else {
(void) fprintf(stderr, "%s\n",
zfs_get_name(zhp));
@@ -1429,7 +1476,7 @@ zfs_do_rollback(int argc, char **argv)
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1439,22 +1486,22 @@ zfs_do_rollback(int argc, char **argv)
/* check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing dataset argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/* open the snapshot */
- if ((snap = zfs_open(argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+ if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
return (1);
/* open the parent dataset */
(void) strlcpy(parentname, argv[0], sizeof (parentname));
verify((delim = strrchr(parentname, '@')) != NULL);
*delim = '\0';
- if ((zhp = zfs_open(parentname, ZFS_TYPE_ANY)) == NULL) {
+ if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_ANY)) == NULL) {
zfs_close(snap);
return (1);
}
@@ -1465,7 +1512,7 @@ zfs_do_rollback(int argc, char **argv)
*/
cb.cb_target = argv[0];
cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
- cb.cb_first = 1;
+ cb.cb_first = B_TRUE;
cb.cb_error = 0;
(void) zfs_iter_children(zhp, rollback_check, &cb);
@@ -1606,18 +1653,18 @@ zfs_do_set(int argc, char **argv)
if (argc > 1 && argv[1][0] == '-') {
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
argv[1][1]);
- usage(FALSE);
+ usage(B_FALSE);
}
/* check number of arguments */
if (argc < 2) {
(void) fprintf(stderr, gettext("missing property=value "
"argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 3) {
(void) fprintf(stderr, gettext("missing dataset name\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/* validate property=value argument */
@@ -1625,7 +1672,7 @@ zfs_do_set(int argc, char **argv)
if ((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) {
(void) fprintf(stderr, gettext("missing value in "
"property=value argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
*cb.cb_value = '\0';
@@ -1634,12 +1681,12 @@ zfs_do_set(int argc, char **argv)
if (*cb.cb_propname == '\0') {
(void) fprintf(stderr,
gettext("missing property in property=value argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (*cb.cb_value == '\0') {
(void) fprintf(stderr,
gettext("missing value in property=value argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/* get the property type */
@@ -1647,7 +1694,7 @@ zfs_do_set(int argc, char **argv)
ZFS_PROP_INVAL) {
(void) fprintf(stderr,
gettext("invalid property '%s'\n"), cb.cb_propname);
- usage(FALSE);
+ usage(B_FALSE);
}
/*
@@ -1655,10 +1702,10 @@ zfs_do_set(int argc, char **argv)
* once now so we don't generate multiple errors each time we try to
* apply it to a dataset.
*/
- if (zfs_prop_validate(cb.cb_prop, cb.cb_value, NULL) != 0)
+ if (zfs_prop_validate(g_zfs, cb.cb_prop, cb.cb_value, NULL) != 0)
return (1);
- return (zfs_for_each(argc - 2, argv + 2, FALSE,
+ return (zfs_for_each(argc - 2, argv + 2, B_FALSE,
ZFS_TYPE_ANY, set_callback, &cb));
}
@@ -1675,20 +1722,20 @@ zfs_do_snapshot(int argc, char **argv)
if (argc > 1 && argv[1][0] == '-') {
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
argv[1][1]);
- usage(FALSE);
+ usage(B_FALSE);
}
/* check number of arguments */
if (argc < 2) {
(void) fprintf(stderr, gettext("missing snapshot argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 2) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
- return (zfs_snapshot(argv[1]) != 0);
+ return (zfs_snapshot(g_zfs, argv[1]) != 0);
}
/*
@@ -1712,12 +1759,12 @@ zfs_do_send(int argc, char **argv)
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1727,11 +1774,11 @@ zfs_do_send(int argc, char **argv)
/* check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing snapshot argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (isatty(STDOUT_FILENO)) {
@@ -1743,10 +1790,11 @@ zfs_do_send(int argc, char **argv)
}
if (fromname) {
- if ((zhp_from = zfs_open(fromname, ZFS_TYPE_SNAPSHOT)) == NULL)
+ if ((zhp_from = zfs_open(g_zfs, fromname,
+ ZFS_TYPE_SNAPSHOT)) == NULL)
return (1);
}
- if ((zhp_to = zfs_open(argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+ if ((zhp_to = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
return (1);
err = zfs_send(zhp_to, zhp_from);
@@ -1767,31 +1815,31 @@ static int
zfs_do_receive(int argc, char **argv)
{
int c, err;
- int isprefix = FALSE;
- int dryrun = FALSE;
- int verbose = FALSE;
+ boolean_t isprefix = B_FALSE;
+ boolean_t dryrun = B_FALSE;
+ boolean_t verbose = B_FALSE;
/* check options */
while ((c = getopt(argc, argv, ":dnv")) != -1) {
switch (c) {
case 'd':
- isprefix = TRUE;
+ isprefix = B_TRUE;
break;
case 'n':
- dryrun = TRUE;
+ dryrun = B_TRUE;
break;
case 'v':
- verbose = TRUE;
+ verbose = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1801,11 +1849,11 @@ zfs_do_receive(int argc, char **argv)
/* check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing snapshot argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (isatty(STDIN_FILENO)) {
@@ -1816,7 +1864,7 @@ zfs_do_receive(int argc, char **argv)
return (1);
}
- err = zfs_receive(argv[0], isprefix, verbose, dryrun);
+ err = zfs_receive(g_zfs, argv[0], isprefix, verbose, dryrun);
return (err != 0);
}
@@ -1868,7 +1916,7 @@ get_all_filesystems(zfs_handle_t ***fslist, size_t *count)
{
get_all_cbdata_t cb = { 0 };
- (void) zfs_iter_root(get_one_filesystem, &cb);
+ (void) zfs_iter_root(g_zfs, get_one_filesystem, &cb);
*fslist = cb.cb_handles;
*count = cb.cb_used;
@@ -1883,9 +1931,9 @@ mountpoint_compare(const void *a, const void *b)
char mountb[MAXPATHLEN];
verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
- sizeof (mounta), NULL, NULL, 0, FALSE) == 0);
+ sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
- sizeof (mountb), NULL, NULL, 0, FALSE) == 0);
+ sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
return (strcmp(mounta, mountb));
}
@@ -1953,9 +2001,9 @@ share_mount_callback(zfs_handle_t *zhp, void *data)
* with a legacy mountpoint, or those with legacy share options.
*/
verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
- sizeof (mountpoint), NULL, NULL, 0, FALSE) == 0);
+ sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
- sizeof (shareopts), NULL, NULL, 0, FALSE) == 0);
+ sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
if (cbp->cb_type == OP_SHARE) {
if (strcmp(shareopts, "off") == 0) {
@@ -2080,12 +2128,12 @@ share_or_mount(int type, int argc, char **argv)
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -2099,7 +2147,7 @@ share_or_mount(int type, int argc, char **argv)
if (argc != 0) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
get_all_filesystems(&fslist, &count);
@@ -2124,7 +2172,7 @@ share_or_mount(int type, int argc, char **argv)
if (type == OP_SHARE) {
(void) fprintf(stderr, gettext("missing filesystem "
"argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/*
@@ -2149,13 +2197,14 @@ share_or_mount(int type, int argc, char **argv)
if (argc > 1) {
(void) fprintf(stderr,
gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
- if ((zhp = zfs_open(argv[0], ZFS_TYPE_FILESYSTEM)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0],
+ ZFS_TYPE_FILESYSTEM)) == NULL)
ret = 1;
else {
- cb.cb_explicit = TRUE;
+ cb.cb_explicit = B_TRUE;
ret = share_mount_callback(zhp, &cb);
zfs_close(zhp);
}
@@ -2210,7 +2259,7 @@ unshare_unmount_compare(const void *larg, const void *rarg, void *unused)
* and unmount it appropriately.
*/
static int
-unshare_unmount_path(int type, char *path, int flags, int is_manual)
+unshare_unmount_path(int type, char *path, int flags, boolean_t is_manual)
{
zfs_handle_t *zhp;
int ret;
@@ -2252,12 +2301,13 @@ unshare_unmount_path(int type, char *path, int flags, int is_manual)
return (1);
}
- if ((zhp = zfs_open(entry.mnt_special, ZFS_TYPE_FILESYSTEM)) == NULL)
+ if ((zhp = zfs_open(g_zfs, entry.mnt_special,
+ ZFS_TYPE_FILESYSTEM)) == NULL)
return (1);
verify(zfs_prop_get(zhp, type == OP_SHARE ?
ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
- sizeof (property), NULL, NULL, 0, FALSE) == 0);
+ sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
if (type == OP_SHARE) {
if (strcmp(property, "off") == 0) {
@@ -2318,7 +2368,7 @@ unshare_unmount(int type, int argc, char **argv)
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -2329,7 +2379,7 @@ unshare_unmount(int type, int argc, char **argv)
if (do_all) {
if (argc != 0) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
} else if (argc != 1) {
if (argc == 0)
@@ -2338,7 +2388,7 @@ unshare_unmount(int type, int argc, char **argv)
else
(void) fprintf(stderr,
gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (do_all) {
@@ -2390,7 +2440,7 @@ unshare_unmount(int type, int argc, char **argv)
if (strchr(entry.mnt_special, '@') != NULL)
continue;
- if ((zhp = zfs_open(entry.mnt_special,
+ if ((zhp = zfs_open(g_zfs, entry.mnt_special,
ZFS_TYPE_FILESYSTEM)) == NULL) {
ret = 1;
continue;
@@ -2399,7 +2449,7 @@ unshare_unmount(int type, int argc, char **argv)
verify(zfs_prop_get(zhp, type == OP_SHARE ?
ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
property, sizeof (property), NULL, NULL,
- 0, FALSE) == 0);
+ 0, B_FALSE) == 0);
/* Ignore legacy mounts and shares */
if ((type == OP_SHARE &&
@@ -2476,14 +2526,15 @@ unshare_unmount(int type, int argc, char **argv)
*/
if (argv[0][0] == '/')
return (unshare_unmount_path(type, argv[0],
- flags, FALSE));
+ flags, B_FALSE));
- if ((zhp = zfs_open(argv[0], ZFS_TYPE_FILESYSTEM)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0],
+ ZFS_TYPE_FILESYSTEM)) == NULL)
return (1);
verify(zfs_prop_get(zhp, type == OP_SHARE ?
ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
- sizeof (property), NULL, NULL, 0, FALSE) == 0);
+ sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
switch (type) {
case OP_SHARE:
@@ -2581,7 +2632,7 @@ manual_mount(int argc, char **argv)
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -2613,11 +2664,11 @@ manual_mount(int argc, char **argv)
path = argv[1];
/* try to open the dataset */
- if ((zhp = zfs_open(dataset, ZFS_TYPE_FILESYSTEM)) == NULL)
+ if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_FILESYSTEM)) == NULL)
return (1);
(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
- sizeof (mountpoint), NULL, NULL, 0, FALSE);
+ sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
/* check for legacy mountpoint and complain appropriately */
ret = 0;
@@ -2683,7 +2734,7 @@ manual_unmount(int argc, char **argv)
return (2);
}
- return (unshare_unmount_path(OP_MOUNT, argv[0], flags, TRUE));
+ return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE));
}
static int
@@ -2702,9 +2753,9 @@ volcheck(zpool_handle_t *zhp, void *data)
* links, depending on the value of 'isinit'.
*/
static int
-do_volcheck(int isinit)
+do_volcheck(boolean_t isinit)
{
- return (zpool_iter(volcheck, (void *)isinit) ? 1 : 0);
+ return (zpool_iter(g_zfs, volcheck, (void *)isinit) ? 1 : 0);
}
int
@@ -2720,6 +2771,14 @@ main(int argc, char **argv)
opterr = 0;
+ if ((g_zfs = libzfs_init()) == NULL) {
+ (void) fprintf(stderr, gettext("internal error: failed to "
+ "initialize ZFS library\n"));
+ return (1);
+ }
+
+ libzfs_print_on_error(g_zfs, B_TRUE);
+
if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) {
(void) fprintf(stderr, gettext("internal error: unable to "
"open %s\n"), MNTTAB);
@@ -2741,7 +2800,7 @@ main(int argc, char **argv)
*/
if (argc < 2) {
(void) fprintf(stderr, gettext("missing command\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
cmdname = argv[1];
@@ -2762,16 +2821,16 @@ main(int argc, char **argv)
* Special case '-?'
*/
if (strcmp(cmdname, "-?") == 0)
- usage(TRUE);
+ usage(B_TRUE);
/*
* 'volinit' and 'volfini' do not appear in the usage message,
* so we have to special case them here.
*/
if (strcmp(cmdname, "volinit") == 0)
- return (do_volcheck(TRUE));
+ return (do_volcheck(B_TRUE));
else if (strcmp(cmdname, "volfini") == 0)
- return (do_volcheck(FALSE));
+ return (do_volcheck(B_FALSE));
/*
* Run the appropriate command.
@@ -2790,12 +2849,14 @@ main(int argc, char **argv)
if (i == NCOMMAND) {
(void) fprintf(stderr, gettext("unrecognized "
"command '%s'\n"), cmdname);
- usage(FALSE);
+ usage(B_FALSE);
}
}
(void) fclose(mnttab_file);
+ libzfs_fini(g_zfs);
+
/*
* The 'ZFS_ABORT' environment variable causes us to dump core on exit
* for the purposes of running ::findleaks.
diff --git a/usr/src/cmd/zfs/zfs_util.h b/usr/src/cmd/zfs/zfs_util.h
index 5b2fcfa9f3..c7f2f16186 100644
--- a/usr/src/cmd/zfs/zfs_util.h
+++ b/usr/src/cmd/zfs/zfs_util.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -29,11 +28,14 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#include <libzfs.h>
+
#ifdef __cplusplus
extern "C" {
#endif
void * safe_malloc(size_t size);
+libzfs_handle_t *g_zfs;
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/zinject/Makefile.com b/usr/src/cmd/zinject/Makefile.com
index 14651a366c..c1ac4ac922 100644
--- a/usr/src/cmd/zinject/Makefile.com
+++ b/usr/src/cmd/zinject/Makefile.com
@@ -34,7 +34,7 @@ include ../../Makefile.cmd
INCS += -I../../../lib/libzpool/common
INCS += -I../../../uts/common/fs/zfs
-LDLIBS += -lzpool -lzfs
+LDLIBS += -lzpool -lzfs -lnvpair
C99MODE= -xc99=%all
C99LMODE= -Xc99=%all
diff --git a/usr/src/cmd/zinject/translate.c b/usr/src/cmd/zinject/translate.c
index 882b230930..b4f6693aa1 100644
--- a/usr/src/cmd/zinject/translate.c
+++ b/usr/src/cmd/zinject/translate.c
@@ -436,22 +436,28 @@ translate_device(const char *pool, const char *device, zinject_record_t *record)
{
char *end;
zpool_handle_t *zhp;
+ nvlist_t *tgt;
+ boolean_t isspare;
/*
* Given a device name or GUID, create an appropriate injection record
* with zi_guid set.
*/
- if ((zhp = zpool_open(pool)) == NULL)
+ if ((zhp = zpool_open(g_zfs, pool)) == NULL)
return (-1);
record->zi_guid = strtoull(device, &end, 16);
- if (record->zi_guid == 0 || *end != '\0')
- record->zi_guid = zpool_vdev_to_guid(zhp, device);
+ if (record->zi_guid == 0 || *end != '\0') {
+ tgt = zpool_find_vdev(zhp, device, &isspare);
- if (record->zi_guid == 0) {
- (void) fprintf(stderr, "cannot find device '%s' in pool '%s'\n",
- device, pool);
- return (-1);
+ if (tgt == NULL) {
+ (void) fprintf(stderr, "cannot find device '%s' in "
+ "pool '%s'\n", device, pool);
+ return (-1);
+ }
+
+ verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+ &record->zi_guid) == 0);
}
return (0);
diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c
index b584fb0de5..02fc6a16ef 100644
--- a/usr/src/cmd/zinject/zinject.c
+++ b/usr/src/cmd/zinject/zinject.c
@@ -151,6 +151,7 @@
#include "zinject.h"
+libzfs_handle_t *g_zfs;
int zfs_fd;
#define ECKSUM EBADE
@@ -479,6 +480,14 @@ main(int argc, char **argv)
int ret;
int flags = 0;
+ if ((g_zfs = libzfs_init()) == NULL) {
+ (void) fprintf(stderr, "internal error: failed to "
+ "initialize ZFS library\n");
+ return (1);
+ }
+
+ libzfs_print_on_error(g_zfs, B_TRUE);
+
if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
(void) fprintf(stderr, "failed to open ZFS device\n");
return (1);
@@ -721,7 +730,7 @@ main(int argc, char **argv)
* time we access the pool.
*/
if (dataset[0] != '\0' && domount) {
- if ((zhp = zfs_open(dataset, ZFS_TYPE_ANY)) == NULL)
+ if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL)
return (1);
if (zfs_unmount(zhp, NULL, 0) != 0)
@@ -735,5 +744,7 @@ main(int argc, char **argv)
if (dataset[0] != '\0' && domount)
ret = (zfs_mount(zhp, NULL, 0) != 0);
+ libzfs_fini(g_zfs);
+
return (ret);
}
diff --git a/usr/src/cmd/zinject/zinject.h b/usr/src/cmd/zinject/zinject.h
index bdbc2454c4..8086c4bc80 100644
--- a/usr/src/cmd/zinject/zinject.h
+++ b/usr/src/cmd/zinject/zinject.h
@@ -57,6 +57,8 @@ int translate_device(const char *pool, const char *device,
zinject_record_t *record);
void usage(void);
+extern libzfs_handle_t *g_zfs;
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/zoneadm/zfs.c b/usr/src/cmd/zoneadm/zfs.c
index 98fa5a44b5..eb9822781a 100644
--- a/usr/src/cmd/zoneadm/zfs.c
+++ b/usr/src/cmd/zoneadm/zfs.c
@@ -47,7 +47,7 @@
#include "zoneadm.h"
-static const char *current_dataset;
+libzfs_handle_t *g_zfs;
typedef struct zfs_mount_data {
char *match_name;
@@ -61,41 +61,6 @@ typedef struct zfs_snapshot_data {
} zfs_snapshot_data_t;
/*
- * ZFS error handler to do nothing - do not print the libzfs error messages.
- */
-/* ARGSUSED */
-static void
-noop_err_handler(const char *fmt, va_list ap)
-{
-}
-
-/*
- * Custom error handler for errors incurred as part of verifying datasets. We
- * want to trim off the leading 'cannot open ...' to create a better error
- * message. The only other way this can fail is if we fail to set the 'zoned'
- * property. In this case we just pass the error on verbatim.
- */
-static void
-err_handler(const char *fmt, va_list ap)
-{
- char buf[1024];
-
- (void) vsnprintf(buf, sizeof (buf), fmt, ap);
-
- if (strncmp(gettext("cannot open "), buf,
- strlen(gettext("cannot open "))) == 0)
- /*
- * TRANSLATION_NOTE
- * zfs and dataset are literals that should not be translated.
- */
- (void) fprintf(stderr, gettext("could not verify zfs "
- "dataset %s%s\n"), current_dataset, strchr(buf, ':'));
- else
- (void) fprintf(stderr, gettext("could not verify zfs dataset "
- "%s: %s\n"), current_dataset, buf);
-}
-
-/*
* A ZFS file system iterator call-back function which is used to validate
* datasets imported into the zone.
*/
@@ -141,7 +106,7 @@ match_mountpoint(zfs_handle_t *zhp, void *data)
cbp = (zfs_mount_data_t *)data;
if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
- 0, FALSE) == 0 && strcmp(mp, cbp->match_name) == 0) {
+ 0, B_FALSE) == 0 && strcmp(mp, cbp->match_name) == 0) {
cbp->match_handle = zhp;
return (1);
}
@@ -161,7 +126,7 @@ mount2zhandle(char *mountpoint)
cb.match_name = mountpoint;
cb.match_handle = NULL;
- (void) zfs_iter_root(match_mountpoint, &cb);
+ (void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
return (cb.match_handle);
}
@@ -331,7 +296,7 @@ take_snapshot(char *source_zone, zfs_handle_t *zhp, char *snapshot_name,
if (pre_snapshot(source_zone) != Z_OK)
return (Z_ERR);
- res = zfs_snapshot(snapshot_name);
+ res = zfs_snapshot(g_zfs, snapshot_name);
if (post_snapshot(source_zone) != Z_OK)
return (Z_ERR);
@@ -443,7 +408,7 @@ clone_snap(char *snapshot_name, char *zonepath)
zfs_handle_t *zhp;
zfs_handle_t *clone;
- if ((zhp = zfs_open(snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
+ if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
return (Z_NO_ENTRY);
(void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
@@ -454,7 +419,7 @@ clone_snap(char *snapshot_name, char *zonepath)
return (Z_ERR);
/* create the mountpoint if necessary */
- if ((clone = zfs_open(zonepath, ZFS_TYPE_ANY)) == NULL)
+ if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_ANY)) == NULL)
return (Z_ERR);
/*
@@ -574,14 +539,14 @@ snap2path(char *snap_name, char *path, int len)
/* Get the file system name from the snap_name. */
*p = '\0';
- zhp = zfs_open(snap_name, ZFS_TYPE_ANY);
+ zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_ANY);
*p = '@';
if (zhp == NULL)
return (Z_ERR);
/* Get the file system mount point. */
if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
- 0, FALSE) != 0) {
+ 0, B_FALSE) != 0) {
zfs_close(zhp);
return (Z_ERR);
}
@@ -739,15 +704,16 @@ create_zfs_zonepath(char *zonepath)
if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
return;
- zfs_set_error_handler(noop_err_handler);
-
- if (zfs_create(zfs_name, ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0 ||
- (zhp = zfs_open(zfs_name, ZFS_TYPE_ANY)) == NULL) {
- zfs_set_error_handler(NULL);
+ if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0 ||
+ (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_ANY)) == NULL) {
+ (void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
+ "%s\n"), zfs_name, libzfs_error_description(g_zfs));
return;
}
if (zfs_mount(zhp, NULL, 0) != 0) {
+ (void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
+ "%s\n"), zfs_name, libzfs_error_description(g_zfs));
(void) zfs_destroy(zhp);
} else if (zfs_prop_set(zhp, ZFS_PROP_SHARENFS, "off") != 0) {
(void) fprintf(stderr, gettext("file system %s successfully "
@@ -765,7 +731,6 @@ create_zfs_zonepath(char *zonepath)
}
}
- zfs_set_error_handler(NULL);
zfs_close(zhp);
}
@@ -782,12 +747,8 @@ destroy_zfs(char *zonepath)
boolean_t is_clone = B_FALSE;
char origin[ZFS_MAXPROPLEN];
- zfs_set_error_handler(noop_err_handler);
-
- if ((zhp = mount2zhandle(zonepath)) == NULL) {
- zfs_set_error_handler(NULL);
+ if ((zhp = mount2zhandle(zonepath)) == NULL)
return (Z_ERR);
- }
/*
* We can't destroy the file system if it has dependents.
@@ -795,7 +756,6 @@ destroy_zfs(char *zonepath)
if (zfs_iter_dependents(zhp, has_dependent, NULL) != 0 ||
zfs_unmount(zhp, NULL, 0) != 0) {
zfs_close(zhp);
- zfs_set_error_handler(NULL);
return (Z_ERR);
}
@@ -804,10 +764,9 @@ destroy_zfs(char *zonepath)
* to destroy that as well.
*/
if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
- NULL, 0, FALSE) == 0)
+ NULL, 0, B_FALSE) == 0)
is_clone = B_TRUE;
- zfs_set_error_handler(NULL);
if (zfs_destroy(zhp) != 0) {
/*
* If the destroy fails for some reason, try to remount
@@ -818,7 +777,6 @@ destroy_zfs(char *zonepath)
zfs_close(zhp);
return (Z_ERR);
}
- zfs_set_error_handler(noop_err_handler);
(void) printf(gettext("The ZFS file system for this zone has been "
"destroyed.\n"));
@@ -829,17 +787,16 @@ destroy_zfs(char *zonepath)
/*
* Try to clean up the snapshot that the clone was taken from.
*/
- if ((ohp = zfs_open(origin, ZFS_TYPE_SNAPSHOT)) != NULL) {
+ if ((ohp = zfs_open(g_zfs, origin,
+ ZFS_TYPE_SNAPSHOT)) != NULL) {
if (zfs_iter_dependents(ohp, has_dependent, NULL)
- == 0 && zfs_unmount(ohp, NULL, 0) == 0) {
+ == 0 && zfs_unmount(ohp, NULL, 0) == 0)
(void) zfs_destroy(ohp);
- }
zfs_close(ohp);
}
}
zfs_close(zhp);
- zfs_set_error_handler(NULL);
return (Z_OK);
}
@@ -889,12 +846,8 @@ move_zfs(char *zonepath, char *new_zonepath)
int ret = Z_ERR;
zfs_handle_t *zhp;
- zfs_set_error_handler(noop_err_handler);
-
- if ((zhp = mount2zhandle(zonepath)) == NULL) {
- zfs_set_error_handler(NULL);
+ if ((zhp = mount2zhandle(zonepath)) == NULL)
return (Z_ERR);
- }
if (zfs_prop_set(zhp, ZFS_PROP_MOUNTPOINT, new_zonepath) == 0) {
/*
@@ -906,7 +859,6 @@ move_zfs(char *zonepath, char *new_zonepath)
}
zfs_close(zhp);
- zfs_set_error_handler(NULL);
return (ret);
}
@@ -940,14 +892,13 @@ verify_datasets(zone_dochandle_t handle)
return (Z_ERR);
}
- zfs_set_error_handler(err_handler);
-
while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
- current_dataset = dstab.zone_dataset_name;
-
- if ((zhp = zfs_open(dstab.zone_dataset_name,
+ if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
+ (void) fprintf(stderr, gettext("could not verify zfs "
+ "dataset %s: %s\n"), dstab.zone_dataset_name,
+ libzfs_error_description(g_zfs));
return_code = Z_ERR;
continue;
}
@@ -978,7 +929,6 @@ verify_datasets(zone_dochandle_t handle)
zfs_close(zhp);
}
(void) zonecfg_enddsent(handle);
- zfs_set_error_handler(NULL);
return (return_code);
}
@@ -993,13 +943,11 @@ verify_fs_zfs(struct zone_fstab *fstab)
zfs_handle_t *zhp;
char propbuf[ZFS_MAXPROPLEN];
- zfs_set_error_handler(noop_err_handler);
-
- if ((zhp = zfs_open(fstab->zone_fs_special, ZFS_TYPE_ANY)) == NULL) {
+ if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
+ ZFS_TYPE_ANY)) == NULL) {
(void) fprintf(stderr, gettext("could not verify fs %s: "
"could not access zfs dataset '%s'\n"),
fstab->zone_fs_dir, fstab->zone_fs_special);
- zfs_set_error_handler(NULL);
return (Z_ERR);
}
@@ -1008,7 +956,6 @@ verify_fs_zfs(struct zone_fstab *fstab)
"'%s' is not a file system\n"),
fstab->zone_fs_dir, fstab->zone_fs_special);
zfs_close(zhp);
- zfs_set_error_handler(NULL);
return (Z_ERR);
}
@@ -1018,11 +965,21 @@ verify_fs_zfs(struct zone_fstab *fstab)
"zfs '%s' mountpoint is not \"legacy\"\n"),
fstab->zone_fs_dir, fstab->zone_fs_special);
zfs_close(zhp);
- zfs_set_error_handler(NULL);
return (Z_ERR);
}
zfs_close(zhp);
- zfs_set_error_handler(NULL);
+ return (Z_OK);
+}
+
+int
+init_zfs(void)
+{
+ if ((g_zfs = libzfs_init()) == NULL) {
+ (void) fprintf(stderr, gettext("failed to initialize ZFS "
+ "library\n"));
+ return (Z_ERR);
+ }
+
return (Z_OK);
}
diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c
index e25895736c..50c3b1ecd7 100644
--- a/usr/src/cmd/zoneadm/zoneadm.c
+++ b/usr/src/cmd/zoneadm/zoneadm.c
@@ -4433,6 +4433,9 @@ main(int argc, char **argv)
exit(Z_ERR);
}
+ if (init_zfs() != Z_OK)
+ exit(Z_ERR);
+
while ((arg = getopt(argc, argv, "?z:R:")) != EOF) {
switch (arg) {
case '?':
diff --git a/usr/src/cmd/zoneadm/zoneadm.h b/usr/src/cmd/zoneadm/zoneadm.h
index 161d7cee18..d6aa67798d 100644
--- a/usr/src/cmd/zoneadm/zoneadm.h
+++ b/usr/src/cmd/zoneadm/zoneadm.h
@@ -81,6 +81,7 @@ extern boolean_t is_zonepath_zfs(char *zonepath);
extern int move_zfs(char *zonepath, char *new_zonepath);
extern int verify_datasets(zone_dochandle_t handle);
extern int verify_fs_zfs(struct zone_fstab *fstab);
+extern int init_zfs(void);
/*
* sw_cmp.c
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index d629c7a9c4..84b06afb17 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -2631,21 +2631,13 @@ out:
return (error);
}
-/* ARGSUSED */
-static void
-zfs_error_handler(const char *fmt, va_list ap)
-{
- /*
- * Do nothing - we interpret the failures from each libzfs call below.
- */
-}
-
static int
validate_datasets(zlog_t *zlogp)
{
zone_dochandle_t handle;
struct zone_dstab dstab;
zfs_handle_t *zhp;
+ libzfs_handle_t *hdl;
if ((handle = zonecfg_init_handle()) == NULL) {
zerror(zlogp, B_TRUE, "getting zone configuration handle");
@@ -2663,15 +2655,20 @@ validate_datasets(zlog_t *zlogp)
return (-1);
}
- zfs_set_error_handler(zfs_error_handler);
+ if ((hdl = libzfs_init()) == NULL) {
+ zerror(zlogp, B_FALSE, "opening ZFS library");
+ zonecfg_fini_handle(handle);
+ return (-1);
+ }
while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
- if ((zhp = zfs_open(dstab.zone_dataset_name,
+ if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
ZFS_TYPE_FILESYSTEM)) == NULL) {
zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
dstab.zone_dataset_name);
zonecfg_fini_handle(handle);
+ libzfs_fini(hdl);
return (-1);
}
@@ -2686,6 +2683,7 @@ validate_datasets(zlog_t *zlogp)
dstab.zone_dataset_name);
zonecfg_fini_handle(handle);
zfs_close(zhp);
+ libzfs_fini(hdl);
return (-1);
}
@@ -2694,6 +2692,7 @@ validate_datasets(zlog_t *zlogp)
(void) zonecfg_enddsent(handle);
zonecfg_fini_handle(handle);
+ libzfs_fini(hdl);
return (0);
}
diff --git a/usr/src/cmd/zpool/zpool_dataset.c b/usr/src/cmd/zpool/zpool_dataset.c
index d6cdde87bd..0b4c6a15fe 100644
--- a/usr/src/cmd/zpool/zpool_dataset.c
+++ b/usr/src/cmd/zpool/zpool_dataset.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -59,6 +58,8 @@ do_unmount(zfs_handle_t *zfsp, void *data)
if (zfs_unmount(zfsp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0)
cbp->cb_failed = 1;
+ zfs_close(zfsp);
+
return (0);
}
@@ -78,7 +79,8 @@ unmount_datasets(zpool_handle_t *zhp, int force)
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL)
return (0);
- if ((zfsp = zfs_open(zpool_get_name(zhp), ZFS_TYPE_FILESYSTEM)) == NULL)
+ if ((zfsp = zfs_open(g_zfs, zpool_get_name(zhp),
+ ZFS_TYPE_FILESYSTEM)) == NULL)
return (-1);
cb.cb_force = force;
@@ -89,12 +91,8 @@ unmount_datasets(zpool_handle_t *zhp, int force)
return (-1);
}
- if (do_unmount(zfsp, &cb) != 0 || cb.cb_failed != 0) {
- zfs_close(zfsp);
+ if (do_unmount(zfsp, &cb) != 0 || cb.cb_failed != 0)
return (-1);
- }
-
- zfs_close(zfsp);
return (0);
}
@@ -108,8 +106,10 @@ do_mount_share(zfs_handle_t *zfsp, void *data)
cbdata_t *cbp = data;
int ret;
- if (zfs_get_type(zfsp) != ZFS_TYPE_FILESYSTEM)
+ if (zfs_get_type(zfsp) != ZFS_TYPE_FILESYSTEM) {
+ zfs_close(zfsp);
return (0);
+ }
if (zfs_mount(zfsp, cbp->cb_mntopts, 0) != 0)
cbp->cb_failed = 1;
@@ -118,6 +118,7 @@ do_mount_share(zfs_handle_t *zfsp, void *data)
ret = zfs_iter_children(zfsp, do_mount_share, data);
+ zfs_close(zfsp);
return (ret);
}
@@ -142,15 +143,12 @@ mount_share_datasets(zpool_handle_t *zhp, const char *options)
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL)
return (0);
- if ((zfsp = zfs_open(zpool_get_name(zhp), ZFS_TYPE_FILESYSTEM)) == NULL)
+ if ((zfsp = zfs_open(g_zfs, zpool_get_name(zhp),
+ ZFS_TYPE_FILESYSTEM)) == NULL)
return (-1);
- if (do_mount_share(zfsp, &cb) != 0 || cb.cb_failed != 0) {
- zfs_close(zfsp);
+ if (do_mount_share(zfsp, &cb) != 0 || cb.cb_failed != 0)
return (-1);
- }
-
- zfs_close(zfsp);
return (0);
}
diff --git a/usr/src/cmd/zpool/zpool_iter.c b/usr/src/cmd/zpool/zpool_iter.c
index f99396da81..4a0a9ef162 100644
--- a/usr/src/cmd/zpool/zpool_iter.c
+++ b/usr/src/cmd/zpool/zpool_iter.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -50,7 +49,7 @@ typedef struct zpool_node {
} zpool_node_t;
struct zpool_list {
- int zl_findall;
+ boolean_t zl_findall;
uu_avl_t *zl_avl;
uu_avl_pool_t *zl_pool;
};
@@ -114,18 +113,18 @@ pool_list_get(int argc, char **argv, int *err)
no_memory();
if (argc == 0) {
- (void) zpool_iter(add_pool, zlp);
- zlp->zl_findall = TRUE;
+ (void) zpool_iter(g_zfs, add_pool, zlp);
+ zlp->zl_findall = B_TRUE;
} else {
int i;
for (i = 0; i < argc; i++) {
zpool_handle_t *zhp;
- if ((zhp = zpool_open_canfail(argv[i])) != NULL)
+ if ((zhp = zpool_open_canfail(g_zfs, argv[i])) != NULL)
(void) add_pool(zhp, zlp);
else
- *err = TRUE;
+ *err = B_TRUE;
}
}
@@ -141,7 +140,7 @@ void
pool_list_update(zpool_list_t *zlp)
{
if (zlp->zl_findall)
- (void) zpool_iter(add_pool, zlp);
+ (void) zpool_iter(g_zfs, add_pool, zlp);
}
/*
@@ -223,7 +222,7 @@ pool_list_count(zpool_list_t *zlp)
* using the pool_list_* interfaces.
*/
int
-for_each_pool(int argc, char **argv, int unavail, zpool_iter_f func,
+for_each_pool(int argc, char **argv, boolean_t unavail, zpool_iter_f func,
void *data)
{
zpool_list_t *list;
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index e2297b24aa..c963776a9f 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -51,6 +52,7 @@ static int zpool_do_create(int, char **);
static int zpool_do_destroy(int, char **);
static int zpool_do_add(int, char **);
+static int zpool_do_remove(int, char **);
static int zpool_do_list(int, char **);
static int zpool_do_iostat(int, char **);
@@ -76,7 +78,7 @@ static int zpool_do_upgrade(int, char **);
* debugging facilities.
*/
const char *
-_umem_debug_init()
+_umem_debug_init(void)
{
return ("default,verbose"); /* $UMEM_DEBUG setting */
}
@@ -101,6 +103,7 @@ typedef enum {
HELP_OFFLINE,
HELP_ONLINE,
HELP_REPLACE,
+ HELP_REMOVE,
HELP_SCRUB,
HELP_STATUS,
HELP_UPGRADE
@@ -127,6 +130,7 @@ static zpool_command_t command_table[] = {
{ "destroy", zpool_do_destroy, HELP_DESTROY },
{ NULL },
{ "add", zpool_do_add, HELP_ADD },
+ { "remove", zpool_do_remove, HELP_REMOVE },
{ NULL },
{ "list", zpool_do_list, HELP_LIST },
{ "iostat", zpool_do_iostat, HELP_IOSTAT },
@@ -188,6 +192,8 @@ get_usage(zpool_help_t idx) {
case HELP_REPLACE:
return (gettext("\treplace [-f] <pool> <device> "
"[new_device]\n"));
+ case HELP_REMOVE:
+ return (gettext("\tremove <pool> <device>\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
case HELP_STATUS:
@@ -253,7 +259,7 @@ static char *column_subopts[] = {
* a complete usage message.
*/
void
-usage(int requested)
+usage(boolean_t requested)
{
int i;
FILE *fp = requested ? stdout : stderr;
@@ -324,7 +330,7 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent)
return;
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(zhp, child[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, child[c]);
print_vdev_tree(zhp, vname, child[c], indent + 2);
free(vname);
}
@@ -344,8 +350,8 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent)
int
zpool_do_add(int argc, char **argv)
{
- int force = FALSE;
- int dryrun = FALSE;
+ boolean_t force = B_FALSE;
+ boolean_t dryrun = B_FALSE;
int c;
nvlist_t *nvroot;
char *poolname;
@@ -357,15 +363,15 @@ zpool_do_add(int argc, char **argv)
while ((c = getopt(argc, argv, "fn")) != -1) {
switch (c) {
case 'f':
- force = TRUE;
+ force = B_TRUE;
break;
case 'n':
- dryrun = TRUE;
+ dryrun = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -375,11 +381,11 @@ zpool_do_add(int argc, char **argv)
/* get pool name and check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 2) {
(void) fprintf(stderr, gettext("missing vdev specification\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
poolname = argv[0];
@@ -387,7 +393,7 @@ zpool_do_add(int argc, char **argv)
argc--;
argv++;
- if ((zhp = zpool_open(poolname)) == NULL)
+ if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
if ((config = zpool_get_config(zhp, NULL)) == NULL) {
@@ -398,7 +404,7 @@ zpool_do_add(int argc, char **argv)
}
/* pass off to get_vdev_spec for processing */
- nvroot = make_root_vdev(config, force, !force, argc, argv);
+ nvroot = make_root_vdev(config, force, !force, B_FALSE, argc, argv);
if (nvroot == NULL) {
zpool_close(zhp);
return (1);
@@ -421,6 +427,46 @@ zpool_do_add(int argc, char **argv)
ret = (zpool_add(zhp, nvroot) != 0);
}
+ nvlist_free(nvroot);
+ zpool_close(zhp);
+
+ return (ret);
+}
+
+/*
+ * zpool remove <pool> <vdev>
+ *
+ * Removes the given vdev from the pool. Currently, this only supports removing
+ * spares from the pool. Eventually, we'll want to support removing leaf vdevs
+ * (as an alias for 'detach') as well as toplevel vdevs.
+ */
+int
+zpool_do_remove(int argc, char **argv)
+{
+ char *poolname;
+ int ret;
+ zpool_handle_t *zhp;
+
+ argc--;
+ argv++;
+
+ /* get pool name and check number of arguments */
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("missing pool name argument\n"));
+ usage(B_FALSE);
+ }
+ if (argc < 2) {
+ (void) fprintf(stderr, gettext("missing device\n"));
+ usage(B_FALSE);
+ }
+
+ poolname = argv[0];
+
+ if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
+ return (1);
+
+ ret = (zpool_vdev_remove(zhp, argv[1]) != 0);
+
return (ret);
}
@@ -442,23 +488,25 @@ zpool_do_add(int argc, char **argv)
int
zpool_do_create(int argc, char **argv)
{
- int force = FALSE;
- int dryrun = FALSE;
+ boolean_t force = B_FALSE;
+ boolean_t dryrun = B_FALSE;
int c;
nvlist_t *nvroot;
char *poolname;
int ret;
char *altroot = NULL;
char *mountpoint = NULL;
+ nvlist_t **child;
+ uint_t children;
/* check options */
while ((c = getopt(argc, argv, ":fnR:m:")) != -1) {
switch (c) {
case 'f':
- force = TRUE;
+ force = B_TRUE;
break;
case 'n':
- dryrun = TRUE;
+ dryrun = B_TRUE;
break;
case 'R':
altroot = optarg;
@@ -469,12 +517,12 @@ zpool_do_create(int argc, char **argv)
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -484,11 +532,11 @@ zpool_do_create(int argc, char **argv)
/* get pool name and check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 2) {
(void) fprintf(stderr, gettext("missing vdev specification\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
poolname = argv[0];
@@ -506,13 +554,26 @@ zpool_do_create(int argc, char **argv)
}
/* pass off to get_vdev_spec for bulk processing */
- nvroot = make_root_vdev(NULL, force, !force, argc - 1, argv + 1);
+ nvroot = make_root_vdev(NULL, force, !force, B_FALSE, argc - 1,
+ argv + 1);
if (nvroot == NULL)
return (1);
+ /* make_root_vdev() allows 0 toplevel children if there are spares */
+ verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0);
+ if (children == 0) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: at least one toplevel vdev must be "
+ "specified\n"));
+ return (1);
+ }
+
+
if (altroot != NULL && altroot[0] != '/') {
(void) fprintf(stderr, gettext("invalid alternate root '%s': "
"must be an absolute path\n"));
+ nvlist_free(nvroot);
return (1);
}
@@ -530,6 +591,7 @@ zpool_do_create(int argc, char **argv)
(void) fprintf(stderr, gettext("invalid mountpoint "
"'%s': must be an absolute path, 'legacy', or "
"'none'\n"), mountpoint);
+ nvlist_free(nvroot);
return (1);
}
@@ -560,6 +622,7 @@ zpool_do_create(int argc, char **argv)
"'%s' exists and is not empty\n"), buf);
(void) fprintf(stderr, gettext("use '-m' "
"option to provide a different default\n"));
+ nvlist_free(nvroot);
return (1);
}
}
@@ -570,8 +633,6 @@ zpool_do_create(int argc, char **argv)
* For a dry run invocation, print out a basic message and run
* through all the vdevs in the list and print out in an
* appropriate hierarchy.
- *
- * XXZFS find out of we can create the pool?
*/
(void) printf(gettext("would create '%s' with the "
"following layout:\n\n"), poolname);
@@ -584,8 +645,8 @@ zpool_do_create(int argc, char **argv)
/*
* Hand off to libzfs.
*/
- if (zpool_create(poolname, nvroot, altroot) == 0) {
- zfs_handle_t *pool = zfs_open(poolname,
+ if (zpool_create(g_zfs, poolname, nvroot, altroot) == 0) {
+ zfs_handle_t *pool = zfs_open(g_zfs, poolname,
ZFS_TYPE_FILESYSTEM);
if (pool != NULL) {
if (mountpoint != NULL)
@@ -596,8 +657,10 @@ zpool_do_create(int argc, char **argv)
ret = zfs_share(pool);
zfs_close(pool);
}
+ } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
+ (void) fprintf(stderr, gettext("pool name may have "
+ "been omitted\n"));
}
-
}
nvlist_free(nvroot);
@@ -615,7 +678,7 @@ zpool_do_create(int argc, char **argv)
int
zpool_do_destroy(int argc, char **argv)
{
- int force = FALSE;
+ boolean_t force = B_FALSE;
int c;
char *pool;
zpool_handle_t *zhp;
@@ -625,12 +688,12 @@ zpool_do_destroy(int argc, char **argv)
while ((c = getopt(argc, argv, "f")) != -1) {
switch (c) {
case 'f':
- force = TRUE;
+ force = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -640,16 +703,16 @@ zpool_do_destroy(int argc, char **argv)
/* check arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
pool = argv[0];
- if ((zhp = zpool_open_canfail(pool)) == NULL) {
+ if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) {
/*
* As a special case, check for use of '/' in the name, and
* direct the user to use 'zfs destroy' instead.
@@ -685,7 +748,7 @@ zpool_do_destroy(int argc, char **argv)
int
zpool_do_export(int argc, char **argv)
{
- int force = FALSE;
+ boolean_t force = B_FALSE;
int c;
zpool_handle_t *zhp;
int ret;
@@ -695,12 +758,12 @@ zpool_do_export(int argc, char **argv)
while ((c = getopt(argc, argv, "f")) != -1) {
switch (c) {
case 'f':
- force = TRUE;
+ force = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -710,12 +773,12 @@ zpool_do_export(int argc, char **argv)
/* check arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
ret = 0;
for (i = 0; i < argc; i++) {
- if ((zhp = zpool_open_canfail(argv[i])) == NULL) {
+ if ((zhp = zpool_open_canfail(g_zfs, argv[i])) == NULL) {
ret = 1;
continue;
}
@@ -742,7 +805,7 @@ zpool_do_export(int argc, char **argv)
static int
max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
{
- char *name = zpool_vdev_name(zhp, nv);
+ char *name = zpool_vdev_name(g_zfs, zhp, nv);
nvlist_t **child;
uint_t c, children;
int ret;
@@ -752,13 +815,22 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
free(name);
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++)
+ if ((ret = max_width(zhp, child[c], depth + 2,
+ max)) > max)
+ max = ret;
+ }
+
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0)
- return (max);
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++)
+ if ((ret = max_width(zhp, child[c], depth + 2,
+ max)) > max)
+ max = ret;
+ }
- for (c = 0; c < children; c++)
- if ((ret = max_width(zhp, child[c], depth + 2, max)) > max)
- max = ret;
return (max);
}
@@ -819,11 +891,22 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
return;
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(NULL, child[c]);
+ vname = zpool_vdev_name(g_zfs, NULL, child[c]);
print_import_config(vname, child[c],
namewidth, depth + 2);
free(vname);
}
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) != 0)
+ return;
+
+ (void) printf(gettext("\tspares\n"));
+ for (c = 0; c < children; c++) {
+ vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ (void) printf("\t %s\n", vname);
+ free(vname);
+ }
}
/*
@@ -1009,13 +1092,13 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
return (1);
}
- if (zpool_import(config, newname, altroot) != 0)
+ if (zpool_import(g_zfs, config, newname, altroot) != 0)
return (1);
if (newname != NULL)
name = (char *)newname;
- verify((zhp = zpool_open(name)) != NULL);
+ verify((zhp = zpool_open(g_zfs, name)) != NULL);
if (mount_share_datasets(zhp, mntopts) != 0) {
zpool_close(zhp);
@@ -1056,24 +1139,24 @@ zpool_do_import(int argc, char **argv)
int c;
int err;
nvlist_t *pools;
- int do_all = FALSE;
- int do_destroyed = FALSE;
+ boolean_t do_all = B_FALSE;
+ boolean_t do_destroyed = B_FALSE;
char *altroot = NULL;
char *mntopts = NULL;
- int do_force = FALSE;
+ boolean_t do_force = B_FALSE;
nvpair_t *elem;
nvlist_t *config;
uint64_t searchguid;
char *searchname;
nvlist_t *found_config;
- int first;
+ boolean_t first;
uint64_t pool_state;
/* check options */
while ((c = getopt(argc, argv, ":Dfd:R:ao:")) != -1) {
switch (c) {
case 'a':
- do_all = TRUE;
+ do_all = B_TRUE;
break;
case 'd':
if (searchdirs == NULL) {
@@ -1089,10 +1172,10 @@ zpool_do_import(int argc, char **argv)
searchdirs[nsearch++] = optarg;
break;
case 'D':
- do_destroyed = TRUE;
+ do_destroyed = B_TRUE;
break;
case 'f':
- do_force = TRUE;
+ do_force = B_TRUE;
break;
case 'o':
mntopts = optarg;
@@ -1103,12 +1186,12 @@ zpool_do_import(int argc, char **argv)
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1125,12 +1208,12 @@ zpool_do_import(int argc, char **argv)
if (do_all) {
if (argc != 0) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
} else {
if (argc > 2) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/*
@@ -1141,12 +1224,15 @@ zpool_do_import(int argc, char **argv)
if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) {
(void) fprintf(stderr, gettext("cannot "
"discover pools: permission denied\n"));
+ free(searchdirs);
return (1);
}
}
- if ((pools = zpool_find_import(nsearch, searchdirs)) == NULL)
+ if ((pools = zpool_find_import(g_zfs, nsearch, searchdirs)) == NULL) {
+ free(searchdirs);
return (1);
+ }
/*
* We now have a list of all available pools in the given directories.
@@ -1176,7 +1262,7 @@ zpool_do_import(int argc, char **argv)
err = 0;
elem = NULL;
- first = TRUE;
+ first = B_TRUE;
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
verify(nvpair_value_nvlist(elem, &config) == 0);
@@ -1190,7 +1276,7 @@ zpool_do_import(int argc, char **argv)
if (argc == 0) {
if (first)
- first = FALSE;
+ first = B_FALSE;
else
(void) printf("\n");
@@ -1215,7 +1301,7 @@ zpool_do_import(int argc, char **argv)
"one matching pool\n"), searchname);
(void) fprintf(stderr, gettext(
"import by numeric ID instead\n"));
- err = TRUE;
+ err = B_TRUE;
}
found_config = config;
}
@@ -1241,7 +1327,7 @@ zpool_do_import(int argc, char **argv)
if (found_config == NULL) {
(void) fprintf(stderr, gettext("cannot import '%s': "
"no such pool available\n"), argv[0]);
- err = TRUE;
+ err = B_TRUE;
} else {
err |= do_import(found_config, argc == 1 ? NULL :
argv[1], mntopts, altroot, do_force);
@@ -1257,6 +1343,7 @@ zpool_do_import(int argc, char **argv)
gettext("no pools available to import\n"));
nvlist_free(pools);
+ free(searchdirs);
return (err ? 1 : 0);
}
@@ -1374,7 +1461,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
return;
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(zhp, newchild[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
newchild[c], cb, depth + 2);
free(vname);
@@ -1476,19 +1563,19 @@ zpool_do_iostat(int argc, char **argv)
int npools;
unsigned long interval = 0, count = 0;
zpool_list_t *list;
- int verbose = FALSE;
+ boolean_t verbose = B_FALSE;
iostat_cbdata_t cb;
/* check options */
while ((c = getopt(argc, argv, "v")) != -1) {
switch (c) {
case 'v':
- verbose = TRUE;
+ verbose = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1508,7 +1595,7 @@ zpool_do_iostat(int argc, char **argv)
if (interval == 0) {
(void) fprintf(stderr, gettext("interval "
"cannot be zero\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/*
@@ -1540,7 +1627,7 @@ zpool_do_iostat(int argc, char **argv)
if (interval == 0) {
(void) fprintf(stderr, gettext("interval "
"cannot be zero\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
/*
@@ -1559,10 +1646,13 @@ zpool_do_iostat(int argc, char **argv)
if ((list = pool_list_get(argc, argv, &ret)) == NULL)
return (1);
- if (pool_list_count(list) == 0 && argc != 0)
+ if (pool_list_count(list) == 0 && argc != 0) {
+ pool_list_free(list);
return (1);
+ }
if (pool_list_count(list) == 0 && interval == 0) {
+ pool_list_free(list);
(void) fprintf(stderr, gettext("no pools available\n"));
return (1);
}
@@ -1586,14 +1676,14 @@ zpool_do_iostat(int argc, char **argv)
* before calculating the maximum name width, so that any
* configuration changes are properly accounted for.
*/
- (void) pool_list_iter(list, FALSE, refresh_iostat, &cb);
+ (void) pool_list_iter(list, B_FALSE, refresh_iostat, &cb);
/*
* Iterate over all pools to determine the maximum width
* for the pool / device name column across all pools.
*/
cb.cb_namewidth = 0;
- (void) pool_list_iter(list, FALSE, get_namewidth, &cb);
+ (void) pool_list_iter(list, B_FALSE, get_namewidth, &cb);
/*
* If it's the first time, or verbose mode, print the header.
@@ -1601,7 +1691,7 @@ zpool_do_iostat(int argc, char **argv)
if (++cb.cb_iteration == 1 || verbose)
print_iostat_header(&cb);
- (void) pool_list_iter(list, FALSE, print_iostat, &cb);
+ (void) pool_list_iter(list, B_FALSE, print_iostat, &cb);
/*
* If there's more than one pool, and we're not in verbose mode
@@ -1628,10 +1718,10 @@ zpool_do_iostat(int argc, char **argv)
}
typedef struct list_cbdata {
- int cb_scripted;
- int cb_first;
- int cb_fields[MAX_FIELDS];
- int cb_fieldcount;
+ boolean_t cb_scripted;
+ boolean_t cb_first;
+ int cb_fields[MAX_FIELDS];
+ int cb_fieldcount;
} list_cbdata_t;
/*
@@ -1675,7 +1765,7 @@ list_callback(zpool_handle_t *zhp, void *data)
if (cbp->cb_first) {
if (!cbp->cb_scripted)
print_header(cbp->cb_fields, cbp->cb_fieldcount);
- cbp->cb_first = FALSE;
+ cbp->cb_first = B_FALSE;
}
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
@@ -1803,7 +1893,7 @@ zpool_do_list(int argc, char **argv)
while ((c = getopt(argc, argv, ":Ho:")) != -1) {
switch (c) {
case 'H':
- cb.cb_scripted = TRUE;
+ cb.cb_scripted = B_TRUE;
break;
case 'o':
fields = optarg;
@@ -1811,12 +1901,12 @@ zpool_do_list(int argc, char **argv)
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(FALSE);
+ usage(B_FALSE);
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1827,23 +1917,23 @@ zpool_do_list(int argc, char **argv)
if (cb.cb_fieldcount == MAX_FIELDS) {
(void) fprintf(stderr, gettext("too many "
"properties given to -o option\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if ((cb.cb_fields[cb.cb_fieldcount] = getsubopt(&fields,
column_subopts, &value)) == -1) {
(void) fprintf(stderr, gettext("invalid property "
"'%s'\n"), value);
- usage(FALSE);
+ usage(B_FALSE);
}
cb.cb_fieldcount++;
}
- cb.cb_first = TRUE;
+ cb.cb_first = B_TRUE;
- ret = for_each_pool(argc, argv, TRUE, list_callback, &cb);
+ ret = for_each_pool(argc, argv, B_TRUE, list_callback, &cb);
if (argc == 0 && cb.cb_first) {
(void) printf(gettext("no pools available\n"));
@@ -1883,23 +1973,24 @@ zpool_get_vdev_by_name(nvlist_t *nv, char *name)
static int
zpool_do_attach_or_replace(int argc, char **argv, int replacing)
{
- int force = FALSE;
+ boolean_t force = B_FALSE;
int c;
nvlist_t *nvroot;
char *poolname, *old_disk, *new_disk;
zpool_handle_t *zhp;
nvlist_t *config;
+ int ret;
/* check options */
while ((c = getopt(argc, argv, "f")) != -1) {
switch (c) {
case 'f':
- force = TRUE;
+ force = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -1909,7 +2000,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
/* get pool name and check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
poolname = argv[0];
@@ -1917,7 +2008,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
if (argc < 2) {
(void) fprintf(stderr,
gettext("missing <device> specification\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
old_disk = argv[1];
@@ -1926,7 +2017,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
if (!replacing) {
(void) fprintf(stderr,
gettext("missing <new_device> specification\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
new_disk = old_disk;
argc -= 1;
@@ -1939,10 +2030,10 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
- if ((zhp = zpool_open(poolname)) == NULL)
+ if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
if ((config = zpool_get_config(zhp, NULL)) == NULL) {
@@ -1952,13 +2043,18 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
return (1);
}
- nvroot = make_root_vdev(config, force, B_FALSE, argc, argv);
+ nvroot = make_root_vdev(config, force, B_FALSE, replacing, argc, argv);
if (nvroot == NULL) {
zpool_close(zhp);
return (1);
}
- return (zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing));
+ ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing);
+
+ nvlist_free(nvroot);
+ zpool_close(zhp);
+
+ return (ret);
}
/*
@@ -2008,6 +2104,7 @@ zpool_do_detach(int argc, char **argv)
int c;
char *poolname, *path;
zpool_handle_t *zhp;
+ int ret;
/* check options */
while ((c = getopt(argc, argv, "f")) != -1) {
@@ -2016,7 +2113,7 @@ zpool_do_detach(int argc, char **argv)
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -2026,22 +2123,26 @@ zpool_do_detach(int argc, char **argv)
/* get pool name and check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 2) {
(void) fprintf(stderr,
gettext("missing <device> specification\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
poolname = argv[0];
path = argv[1];
- if ((zhp = zpool_open(poolname)) == NULL)
+ if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
- return (zpool_vdev_detach(zhp, path));
+ ret = zpool_vdev_detach(zhp, path);
+
+ zpool_close(zhp);
+
+ return (ret);
}
/*
@@ -2063,7 +2164,7 @@ zpool_do_online(int argc, char **argv)
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -2073,16 +2174,16 @@ zpool_do_online(int argc, char **argv)
/* get pool name and check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 2) {
(void) fprintf(stderr, gettext("missing device name\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
poolname = argv[0];
- if ((zhp = zpool_open(poolname)) == NULL)
+ if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
for (i = 1; i < argc; i++)
@@ -2092,6 +2193,8 @@ zpool_do_online(int argc, char **argv)
else
ret = 1;
+ zpool_close(zhp);
+
return (ret);
}
@@ -2112,19 +2215,20 @@ zpool_do_offline(int argc, char **argv)
int c, i;
char *poolname;
zpool_handle_t *zhp;
- int ret = 0, istmp = FALSE;
+ int ret = 0;
+ boolean_t istmp = B_FALSE;
/* check options */
while ((c = getopt(argc, argv, "ft")) != -1) {
switch (c) {
case 't':
- istmp = TRUE;
+ istmp = B_TRUE;
break;
case 'f':
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -2134,16 +2238,16 @@ zpool_do_offline(int argc, char **argv)
/* get pool name and check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc < 2) {
(void) fprintf(stderr, gettext("missing device name\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
poolname = argv[0];
- if ((zhp = zpool_open(poolname)) == NULL)
+ if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
for (i = 1; i < argc; i++)
@@ -2153,6 +2257,8 @@ zpool_do_offline(int argc, char **argv)
else
ret = 1;
+ zpool_close(zhp);
+
return (ret);
}
@@ -2170,18 +2276,18 @@ zpool_do_clear(int argc, char **argv)
if (argc < 2) {
(void) fprintf(stderr, gettext("missing pool name\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
if (argc > 3) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
pool = argv[1];
device = argc == 3 ? argv[2] : NULL;
- if ((zhp = zpool_open(pool)) == NULL)
+ if ((zhp = zpool_open(g_zfs, pool)) == NULL)
return (1);
if (zpool_clear(zhp, device) != 0)
@@ -2235,7 +2341,7 @@ zpool_do_scrub(int argc, char **argv)
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -2244,17 +2350,17 @@ zpool_do_scrub(int argc, char **argv)
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
- return (for_each_pool(argc, argv, TRUE, scrub_callback, &cb));
+ return (for_each_pool(argc, argv, B_TRUE, scrub_callback, &cb));
}
typedef struct status_cbdata {
- int cb_verbose;
- int cb_explain;
- int cb_count;
- int cb_first;
+ int cb_count;
+ boolean_t cb_verbose;
+ boolean_t cb_explain;
+ boolean_t cb_first;
} status_cbdata_t;
/*
@@ -2311,12 +2417,57 @@ print_scrub_status(nvlist_t *nvroot)
(u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
}
+typedef struct spare_cbdata {
+ uint64_t cb_guid;
+ zpool_handle_t *cb_zhp;
+} spare_cbdata_t;
+
+static boolean_t
+find_vdev(nvlist_t *nv, uint64_t search)
+{
+ uint64_t guid;
+ nvlist_t **child;
+ uint_t c, children;
+
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
+ search == guid)
+ return (B_TRUE);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++)
+ if (find_vdev(child[c], search))
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static int
+find_spare(zpool_handle_t *zhp, void *data)
+{
+ spare_cbdata_t *cbp = data;
+ nvlist_t *config, *nvroot;
+
+ config = zpool_get_config(zhp, NULL);
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+
+ if (find_vdev(nvroot, cbp->cb_guid)) {
+ cbp->cb_zhp = zhp;
+ return (1);
+ }
+
+ zpool_close(zhp);
+ return (0);
+}
+
/*
* Print out configuration state as requested by status_callback.
*/
void
print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
- int namewidth, int depth)
+ int namewidth, int depth, boolean_t isspare)
{
nvlist_t **child;
uint_t c, children;
@@ -2324,6 +2475,8 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
char rbuf[6], wbuf[6], cbuf[6], repaired[7];
char *vname;
uint64_t notpresent;
+ spare_cbdata_t cb;
+ const char *state;
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
(uint64_t **)&vs, &c) == 0);
@@ -2332,13 +2485,27 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
&child, &children) != 0)
children = 0;
+ state = state_to_name(vs);
+ if (isspare) {
+ /*
+ * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
+ * online drives.
+ */
+ if (vs->vs_aux == VDEV_AUX_SPARED)
+ state = "INUSE";
+ else if (vs->vs_state == VDEV_STATE_HEALTHY)
+ state = "AVAIL";
+ }
+
(void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth,
- name, state_to_name(vs));
+ name, state);
- zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
- zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
- zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
- (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
+ if (!isspare) {
+ zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
+ zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
+ zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
+ (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
+ }
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
&notpresent) == 0) {
@@ -2365,6 +2532,24 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf(gettext("newer version"));
break;
+ case VDEV_AUX_SPARED:
+ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+ &cb.cb_guid) == 0);
+ if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
+ if (strcmp(zpool_get_name(cb.cb_zhp),
+ zpool_get_name(zhp)) == 0)
+ (void) printf(gettext("currently in "
+ "use"));
+ else
+ (void) printf(gettext("in use by "
+ "pool '%s'"),
+ zpool_get_name(cb.cb_zhp));
+ zpool_close(cb.cb_zhp);
+ } else {
+ (void) printf(gettext("currently in use"));
+ }
+ break;
+
default:
(void) printf(gettext("corrupted data"));
break;
@@ -2382,9 +2567,9 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf("\n");
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(zhp, child[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, child[c]);
print_status_config(zhp, vname, child[c],
- namewidth, depth + 2);
+ namewidth, depth + 2, isspare);
free(vname);
}
}
@@ -2443,6 +2628,26 @@ print_error_log(zpool_handle_t *zhp)
}
}
+static void
+print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
+ int namewidth)
+{
+ uint_t i;
+ char *name;
+
+ if (nspares == 0)
+ return;
+
+ (void) printf(gettext("\tspares\n"));
+
+ for (i = 0; i < nspares; i++) {
+ name = zpool_vdev_name(g_zfs, zhp, spares[i]);
+ print_status_config(zhp, name, spares[i],
+ namewidth, 2, B_TRUE);
+ free(name);
+ }
+}
+
/*
* Display a summary of pool status. Displays a summary such as:
*
@@ -2480,7 +2685,7 @@ status_callback(zpool_handle_t *zhp, void *data)
return (0);
if (cbp->cb_first)
- cbp->cb_first = FALSE;
+ cbp->cb_first = B_FALSE;
else
(void) printf("\n");
@@ -2603,6 +2808,8 @@ status_callback(zpool_handle_t *zhp, void *data)
int namewidth;
uint64_t nerr;
size_t realerr;
+ nvlist_t **spares;
+ uint_t nspares;
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
@@ -2618,7 +2825,11 @@ status_callback(zpool_handle_t *zhp, void *data)
(void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth,
"NAME", "STATE", "READ", "WRITE", "CKSUM");
print_status_config(zhp, zpool_get_name(zhp), nvroot,
- namewidth, 0);
+ namewidth, 0, B_FALSE);
+
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0)
+ print_spares(zhp, spares, nspares, namewidth);
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
&nerr) == 0) {
@@ -2632,6 +2843,7 @@ status_callback(zpool_handle_t *zhp, void *data)
nerr = realerr;
(void) printf("\n");
+
if (nerr == 0)
(void) printf(gettext("errors: No known data "
"errors\n"));
@@ -2668,24 +2880,24 @@ zpool_do_status(int argc, char **argv)
while ((c = getopt(argc, argv, "vx")) != -1) {
switch (c) {
case 'v':
- cb.cb_verbose = TRUE;
+ cb.cb_verbose = B_TRUE;
break;
case 'x':
- cb.cb_explain = TRUE;
+ cb.cb_explain = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
argc -= optind;
argv += optind;
- cb.cb_first = TRUE;
+ cb.cb_first = B_TRUE;
- ret = for_each_pool(argc, argv, TRUE, status_callback, &cb);
+ ret = for_each_pool(argc, argv, B_TRUE, status_callback, &cb);
if (argc == 0 && cb.cb_count == 0)
(void) printf(gettext("no pools available\n"));
@@ -2731,13 +2943,13 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
"versions.\n\n"));
(void) printf(gettext("VER POOL\n"));
(void) printf(gettext("--- ------------\n"));
- cbp->cb_first = FALSE;
+ cbp->cb_first = B_FALSE;
}
(void) printf("%2llu %s\n", version,
zpool_get_name(zhp));
} else {
- cbp->cb_first = FALSE;
+ cbp->cb_first = B_FALSE;
ret = zpool_upgrade(zhp);
if (ret == 0)
(void) printf(gettext("Successfully upgraded "
@@ -2752,7 +2964,7 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
"cannot be accessed on the current system.\n\n"));
(void) printf(gettext("VER POOL\n"));
(void) printf(gettext("--- ------------\n"));
- cbp->cb_first = FALSE;
+ cbp->cb_first = B_FALSE;
}
(void) printf("%2llu %s\n", version,
@@ -2811,7 +3023,7 @@ zpool_do_upgrade(int argc, char **argv)
while ((c = getopt(argc, argv, "av")) != -1) {
switch (c) {
case 'a':
- cb.cb_all = TRUE;
+ cb.cb_all = B_TRUE;
break;
case 'v':
showversions = B_TRUE;
@@ -2819,7 +3031,7 @@ zpool_do_upgrade(int argc, char **argv)
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(FALSE);
+ usage(B_FALSE);
}
}
@@ -2830,28 +3042,30 @@ zpool_do_upgrade(int argc, char **argv)
if (cb.cb_all || argc != 0) {
(void) fprintf(stderr, gettext("-v option is "
"incompatible with other arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
} else if (cb.cb_all) {
if (argc != 0) {
(void) fprintf(stderr, gettext("-a option is "
"incompatible with other arguments\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
}
(void) printf(gettext("This system is currently running ZFS version "
"%llu.\n\n"), ZFS_VERSION);
- cb.cb_first = TRUE;
+ cb.cb_first = B_TRUE;
if (showversions) {
(void) printf(gettext("The following versions are "
"suppored:\n\n"));
(void) printf(gettext("VER DESCRIPTION\n"));
(void) printf("--- -----------------------------------------"
"---------------\n");
- (void) printf(gettext(" 1 Initial ZFS version.\n"));
+ (void) printf(gettext(" 1 Initial ZFS version\n"));
(void) printf(gettext(" 2 Ditto blocks "
"(replicated metadata)\n"));
+ (void) printf(gettext(" 3 Hot spares and double parity "
+ "RAID-Z\n"));
(void) printf(gettext("\nFor more information on a particular "
"version, including supported releases, see:\n\n"));
(void) printf("http://www.opensolaris.org/os/community/zfs/"
@@ -2860,7 +3074,7 @@ zpool_do_upgrade(int argc, char **argv)
} else if (argc == 0) {
int notfound;
- ret = zpool_iter(upgrade_cb, &cb);
+ ret = zpool_iter(g_zfs, upgrade_cb, &cb);
notfound = cb.cb_first;
if (!cb.cb_all && ret == 0) {
@@ -2868,7 +3082,7 @@ zpool_do_upgrade(int argc, char **argv)
(void) printf("\n");
cb.cb_first = B_TRUE;
cb.cb_newer = B_TRUE;
- ret = zpool_iter(upgrade_cb, &cb);
+ ret = zpool_iter(g_zfs, upgrade_cb, &cb);
if (!cb.cb_first) {
notfound = B_FALSE;
(void) printf("\n");
@@ -2885,7 +3099,7 @@ zpool_do_upgrade(int argc, char **argv)
"their associated\nfeatures.\n"));
}
} else {
- ret = for_each_pool(argc, argv, FALSE, upgrade_one, NULL);
+ ret = for_each_pool(argc, argv, B_FALSE, upgrade_one, NULL);
}
return (ret);
@@ -2901,6 +3115,14 @@ main(int argc, char **argv)
(void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
+ if ((g_zfs = libzfs_init()) == NULL) {
+ (void) fprintf(stderr, gettext("internal error: failed to "
+ "initialize ZFS library"));
+ return (1);
+ }
+
+ libzfs_print_on_error(g_zfs, B_TRUE);
+
opterr = 0;
/*
@@ -2908,7 +3130,7 @@ main(int argc, char **argv)
*/
if (argc < 2) {
(void) fprintf(stderr, gettext("missing command\n"));
- usage(FALSE);
+ usage(B_FALSE);
}
cmdname = argv[1];
@@ -2917,7 +3139,7 @@ main(int argc, char **argv)
* Special case '-?'
*/
if (strcmp(cmdname, "-?") == 0)
- usage(TRUE);
+ usage(B_TRUE);
/*
* Run the appropriate command.
@@ -2946,9 +3168,11 @@ main(int argc, char **argv)
if (i == NCOMMAND) {
(void) fprintf(stderr, gettext("unrecognized "
"command '%s'\n"), cmdname);
- usage(FALSE);
+ usage(B_FALSE);
}
+ libzfs_fini(g_zfs);
+
/*
* The 'ZFS_ABORT' environment variable causes us to dump core on exit
* for the purposes of running ::findleaks.
diff --git a/usr/src/cmd/zpool/zpool_util.h b/usr/src/cmd/zpool/zpool_util.h
index b2243e8f08..3cb91756de 100644
--- a/usr/src/cmd/zpool/zpool_util.h
+++ b/usr/src/cmd/zpool/zpool_util.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -47,12 +46,12 @@ void no_memory(void);
* Virtual device functions
*/
nvlist_t *make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
- int argc, char **argv);
+ boolean_t isreplace, int argc, char **argv);
/*
* Pool list functions
*/
-int for_each_pool(int, char **, int unavail, zpool_iter_f, void *);
+int for_each_pool(int, char **, boolean_t unavail, zpool_iter_f, void *);
typedef struct zpool_list zpool_list_t;
@@ -69,6 +68,8 @@ void pool_list_remove(zpool_list_t *, zpool_handle_t *);
int unmount_datasets(zpool_handle_t *, int);
int mount_share_datasets(zpool_handle_t *, const char *);
+libzfs_handle_t *g_zfs;
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/zpool/zpool_vdev.c b/usr/src/cmd/zpool/zpool_vdev.c
index 6fba820d10..fa106dffb9 100644
--- a/usr/src/cmd/zpool/zpool_vdev.c
+++ b/usr/src/cmd/zpool/zpool_vdev.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -34,14 +35,19 @@
* file=(path=...)
*
* Group vdevs
- * raidz=(...)
+ * raidz[1|2]=(...)
* mirror=(...)
*
+ * Hot spares
+ *
* While the underlying implementation supports it, group vdevs cannot contain
* other group vdevs. All userland verification of devices is contained within
* this file. If successful, the nvlist returned can be passed directly to the
* kernel; we've done as much verification as possible in userland.
*
+ * Hot spares are a special case, and passed down as an array of disk vdevs, at
+ * the same level as the root of the vdev tree.
+ *
* The only function exported by this file is 'get_vdev_spec'. The function
* performs several passes:
*
@@ -84,10 +90,11 @@
* vdev_error() function keeps track of whether we have seen an error yet, and
* prints out a header if its the first error we've seen.
*/
-int error_seen;
-int is_force;
+boolean_t error_seen;
+boolean_t is_force;
-void
+/*PRINTFLIKE1*/
+static void
vdev_error(const char *fmt, ...)
{
va_list ap;
@@ -100,7 +107,7 @@ vdev_error(const char *fmt, ...)
else
(void) fprintf(stderr, gettext("the following errors "
"must be manually repaired:\n"));
- error_seen = TRUE;
+ error_seen = B_TRUE;
}
va_start(ap, fmt);
@@ -112,10 +119,10 @@ static void
libdiskmgt_error(int error)
{
/*
- * ENXIO is a valid error message if the device doesn't live in
+ * ENXIO/ENODEV is a valid error message if the device doesn't live in
* /dev/dsk. Don't bother printing an error message in this case.
*/
- if (error == ENXIO)
+ if (error == ENXIO || error == ENODEV)
return;
(void) fprintf(stderr, gettext("warning: device in use checking "
@@ -126,7 +133,7 @@ libdiskmgt_error(int error)
* Validate a device, passing the bulk of the work off to libdiskmgt.
*/
int
-check_slice(const char *path, int force, int wholedisk)
+check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
{
char *msg;
int error = 0;
@@ -137,12 +144,18 @@ check_slice(const char *path, int force, int wholedisk)
if (error != 0) {
libdiskmgt_error(error);
return (0);
- } else {
+ } else if (!isspare ||
+ strstr(msg, gettext("hot spare")) == NULL) {
+ /*
+ * The above check is a rather severe hack. It would
+ * probably make more sense to have DM_WHO_ZPOOL_SPARE
+ * instead.
+ */
vdev_error("%s", msg);
free(msg);
+ ret = -1;
}
- ret = -1;
}
/*
@@ -172,7 +185,7 @@ check_slice(const char *path, int force, int wholedisk)
*/
/* ARGSUSED */
int
-check_disk(const char *name, dm_descriptor_t disk, int force)
+check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
{
dm_descriptor_t *drive, *media, *slice;
int err = 0;
@@ -227,8 +240,12 @@ check_disk(const char *name, dm_descriptor_t disk, int force)
* overlapping slices because we are using the whole disk.
*/
for (i = 0; slice[i] != NULL; i++) {
- if (check_slice(dm_get_name(slice[i], &err), force, TRUE) != 0)
+ char *name = dm_get_name(slice[i], &err);
+
+ if (check_slice(name, force, B_TRUE, isspare) != 0)
ret = -1;
+
+ dm_free_name(name);
}
dm_free_descriptors(slice);
@@ -239,7 +256,7 @@ check_disk(const char *name, dm_descriptor_t disk, int force)
* Validate a device.
*/
int
-check_device(const char *path, int force)
+check_device(const char *path, boolean_t force, boolean_t isspare)
{
dm_descriptor_t desc;
int err;
@@ -252,12 +269,12 @@ check_device(const char *path, int force)
assert(dev != NULL);
dev++;
if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
- err = check_disk(path, desc, force);
+ err = check_disk(path, desc, force, isspare);
dm_free_descriptor(desc);
return (err);
}
- return (check_slice(path, force, FALSE));
+ return (check_slice(path, force, B_FALSE, isspare));
}
/*
@@ -265,17 +282,18 @@ check_device(const char *path, int force)
* not in use by another pool.
*/
int
-check_file(const char *file, int force)
+check_file(const char *file, boolean_t force, boolean_t isspare)
{
char *name;
int fd;
int ret = 0;
pool_state_t state;
+ boolean_t inuse;
if ((fd = open(file, O_RDONLY)) < 0)
return (0);
- if (zpool_in_use(fd, &state, &name)) {
+ if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
const char *desc;
switch (state) {
@@ -296,9 +314,24 @@ check_file(const char *file, int force)
break;
}
- if (state == POOL_STATE_ACTIVE || !force) {
- vdev_error(gettext("%s is part of %s pool '%s'\n"),
- file, desc, name);
+ /*
+ * Allow hot spares to be shared between pools.
+ */
+ if (state == POOL_STATE_SPARE && isspare)
+ return (0);
+
+ if (state == POOL_STATE_ACTIVE ||
+ state == POOL_STATE_SPARE || !force) {
+ switch (state) {
+ case POOL_STATE_SPARE:
+ vdev_error(gettext("%s is reserved as a hot "
+ "spare for pool %s\n"), file, name);
+ break;
+ default:
+ vdev_error(gettext("%s is part of %s pool "
+ "'%s'\n"), file, desc, name);
+ break;
+ }
ret = -1;
}
@@ -309,16 +342,16 @@ check_file(const char *file, int force)
return (ret);
}
-static int
+static boolean_t
is_whole_disk(const char *arg, struct stat64 *statbuf)
{
char path[MAXPATHLEN];
(void) snprintf(path, sizeof (path), "%s%s", arg, BACKUP_SLICE);
if (stat64(path, statbuf) == 0)
- return (TRUE);
+ return (B_TRUE);
- return (FALSE);
+ return (B_FALSE);
}
/*
@@ -337,7 +370,7 @@ make_leaf_vdev(const char *arg)
struct stat64 statbuf;
nvlist_t *vdev = NULL;
char *type = NULL;
- int wholedisk = FALSE;
+ boolean_t wholedisk = B_FALSE;
/*
* Determine what type of vdev this is, and put the full path into
@@ -350,7 +383,7 @@ make_leaf_vdev(const char *arg)
* examining the file descriptor afterwards.
*/
if (is_whole_disk(arg, &statbuf)) {
- wholedisk = TRUE;
+ wholedisk = B_TRUE;
} else if (stat64(arg, &statbuf) != 0) {
(void) fprintf(stderr,
gettext("cannot open '%s': %s\n"),
@@ -369,7 +402,7 @@ make_leaf_vdev(const char *arg)
(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT,
arg);
if (is_whole_disk(path, &statbuf)) {
- wholedisk = TRUE;
+ wholedisk = B_TRUE;
} else if (stat64(path, &statbuf) != 0) {
/*
* If we got ENOENT, then the user gave us
@@ -472,8 +505,9 @@ make_leaf_vdev(const char *arg)
* spec have consistent replication levels.
*/
typedef struct replication_level {
- char *type;
- int level;
+ char *zprl_type;
+ uint64_t zprl_children;
+ uint64_t zprl_parity;
} replication_level_t;
/*
@@ -482,7 +516,7 @@ typedef struct replication_level {
* an error message will be displayed for each self-inconsistent vdev.
*/
replication_level_t *
-get_replication(nvlist_t *nvroot, int fatal)
+get_replication(nvlist_t *nvroot, boolean_t fatal)
{
nvlist_t **top;
uint_t t, toplevels;
@@ -491,14 +525,14 @@ get_replication(nvlist_t *nvroot, int fatal)
nvlist_t *nv;
char *type;
replication_level_t lastrep, rep, *ret;
- int dontreport;
+ boolean_t dontreport;
ret = safe_malloc(sizeof (replication_level_t));
verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&top, &toplevels) == 0);
- lastrep.type = NULL;
+ lastrep.zprl_type = NULL;
for (t = 0; t < toplevels; t++) {
nv = top[t];
@@ -509,8 +543,9 @@ get_replication(nvlist_t *nvroot, int fatal)
/*
* This is a 'file' or 'disk' vdev.
*/
- rep.type = type;
- rep.level = 1;
+ rep.zprl_type = type;
+ rep.zprl_children = 1;
+ rep.zprl_parity = 0;
} else {
uint64_t vdev_size;
@@ -523,8 +558,17 @@ get_replication(nvlist_t *nvroot, int fatal)
* We also check that the size of each vdev (if it can
* be determined) is the same.
*/
- rep.type = type;
- rep.level = 0;
+ rep.zprl_type = type;
+ rep.zprl_children = 0;
+
+ if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+ verify(nvlist_lookup_uint64(nv,
+ ZPOOL_CONFIG_NPARITY,
+ &rep.zprl_parity) == 0);
+ assert(rep.zprl_parity != 0);
+ } else {
+ rep.zprl_parity = 0;
+ }
/*
* The 'dontreport' variable indicatest that we've
@@ -542,7 +586,7 @@ get_replication(nvlist_t *nvroot, int fatal)
char *childtype;
int fd, err;
- rep.level++;
+ rep.zprl_children++;
verify(nvlist_lookup_string(cnv,
ZPOOL_CONFIG_TYPE, &childtype) == 0);
@@ -563,10 +607,10 @@ get_replication(nvlist_t *nvroot, int fatal)
"mismatched replication "
"level: %s contains both "
"files and devices\n"),
- rep.type);
+ rep.zprl_type);
else
return (NULL);
- dontreport = TRUE;
+ dontreport = B_TRUE;
}
/*
@@ -611,10 +655,10 @@ get_replication(nvlist_t *nvroot, int fatal)
vdev_error(gettext(
"%s contains devices of "
"different sizes\n"),
- rep.type);
+ rep.zprl_type);
else
return (NULL);
- dontreport = TRUE;
+ dontreport = B_TRUE;
}
type = childtype;
@@ -627,30 +671,45 @@ get_replication(nvlist_t *nvroot, int fatal)
* vdev in 'rep'. Compare it to 'lastrep' to see if its
* different.
*/
- if (lastrep.type != NULL) {
- if (strcmp(lastrep.type, rep.type) != 0) {
+ if (lastrep.zprl_type != NULL) {
+ if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
if (ret != NULL)
free(ret);
ret = NULL;
if (fatal)
vdev_error(gettext(
- "mismatched replication "
- "level: both %s and %s vdevs are "
+ "mismatched replication level: "
+ "both %s and %s vdevs are "
"present\n"),
- lastrep.type, rep.type);
+ lastrep.zprl_type, rep.zprl_type);
else
return (NULL);
- } else if (lastrep.level != rep.level) {
+ } else if (lastrep.zprl_parity != rep.zprl_parity) {
if (ret)
free(ret);
ret = NULL;
if (fatal)
vdev_error(gettext(
- "mismatched replication "
- "level: %d-way %s and %d-way %s "
+ "mismatched replication level: "
+ "both %llu and %llu device parity "
+ "%s vdevs are present\n"),
+ lastrep.zprl_parity,
+ rep.zprl_parity,
+ rep.zprl_type);
+ else
+ return (NULL);
+ } else if (lastrep.zprl_children != rep.zprl_children) {
+ if (ret)
+ free(ret);
+ ret = NULL;
+ if (fatal)
+ vdev_error(gettext(
+ "mismatched replication level: "
+ "both %llu-way and %llu-way %s "
"vdevs are present\n"),
- lastrep.level, lastrep.type,
- rep.level, rep.type);
+ lastrep.zprl_children,
+ rep.zprl_children,
+ rep.zprl_type);
else
return (NULL);
}
@@ -658,10 +717,8 @@ get_replication(nvlist_t *nvroot, int fatal)
lastrep = rep;
}
- if (ret != NULL) {
- ret->type = rep.type;
- ret->level = rep.level;
- }
+ if (ret != NULL)
+ *ret = rep;
return (ret);
}
@@ -687,7 +744,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
- if ((current = get_replication(nvroot, FALSE)) == NULL)
+ if ((current = get_replication(nvroot, B_FALSE)) == NULL)
return (0);
}
@@ -695,7 +752,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
* Get the replication level of the new vdev spec, reporting any
* inconsistencies found.
*/
- if ((new = get_replication(newroot, TRUE)) == NULL) {
+ if ((new = get_replication(newroot, B_TRUE)) == NULL) {
free(current);
return (-1);
}
@@ -706,13 +763,24 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
*/
ret = 0;
if (current != NULL) {
- if (strcmp(current->type, new->type) != 0 ||
- current->level != new->level) {
+ if (strcmp(current->zprl_type, new->zprl_type) != 0) {
vdev_error(gettext(
- "mismatched replication level: pool uses %d-way %s "
- "and new vdev uses %d-way %s\n"),
- current->level, current->type, new->level,
- new->type);
+ "mismatched replication level: pool uses %s "
+ "and new vdev is %s\n"),
+ current->zprl_type, new->zprl_type);
+ ret = -1;
+ } else if (current->zprl_parity != new->zprl_parity) {
+ vdev_error(gettext(
+ "mismatched replication level: pool uses %llu "
+ "device parity and new vdev uses %llu\n"),
+ current->zprl_parity, new->zprl_parity);
+ ret = -1;
+ } else if (current->zprl_children != new->zprl_children) {
+ vdev_error(gettext(
+ "mismatched replication level: pool uses %llu-way "
+ "%s and new vdev uses %llu-way %s\n"),
+ current->zprl_children, current->zprl_type,
+ new->zprl_children, new->zprl_type);
ret = -1;
}
}
@@ -795,10 +863,12 @@ label_disk(char *name)
(void) fprintf(stderr, gettext("use fdisk(1M) to partition "
"the disk, and provide a specific slice\n"));
(void) close(fd);
+ efi_free(vtoc);
return (-1);
}
(void) close(fd);
+ efi_free(vtoc);
return (0);
}
@@ -892,20 +962,75 @@ make_disks(nvlist_t *nv)
if ((ret = make_disks(child[c])) != 0)
return (ret);
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0)
+ for (c = 0; c < children; c++)
+ if ((ret = make_disks(child[c])) != 0)
+ return (ret);
+
return (0);
}
/*
+ * Determine if the given path is a hot spare within the given configuration.
+ */
+static boolean_t
+is_spare(nvlist_t *config, const char *path)
+{
+ int fd;
+ pool_state_t state;
+ char *name;
+ nvlist_t *label;
+ uint64_t guid, spareguid;
+ nvlist_t *nvroot;
+ nvlist_t **spares;
+ uint_t i, nspares;
+ boolean_t inuse;
+
+ if ((fd = open(path, O_RDONLY)) < 0)
+ return (B_FALSE);
+
+ if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
+ !inuse ||
+ state != POOL_STATE_SPARE ||
+ zpool_read_label(fd, &label) != 0) {
+ (void) close(fd);
+ return (B_FALSE);
+ }
+
+ (void) close(fd);
+ verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
+ nvlist_free(label);
+
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ for (i = 0; i < nspares; i++) {
+ verify(nvlist_lookup_uint64(spares[i],
+ ZPOOL_CONFIG_GUID, &spareguid) == 0);
+ if (spareguid == guid)
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
+/*
* Go through and find any devices that are in use. We rely on libdiskmgt for
* the majority of this task.
*/
int
-check_in_use(nvlist_t *nv, int force)
+check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
+ int isspare)
{
nvlist_t **child;
uint_t c, children;
char *type, *path;
int ret;
+ char buf[MAXPATHLEN];
+ uint64_t wholedisk;
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
@@ -914,22 +1039,76 @@ check_in_use(nvlist_t *nv, int force)
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+ /*
+ * As a generic check, we look to see if this is a replace of a
+ * hot spare within the same pool. If so, we allow it
+ * regardless of what libdiskmgt or zpool_in_use() says.
+ */
+ if (isreplacing) {
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk) == 0 && wholedisk)
+ (void) snprintf(buf, sizeof (buf), "%ss0",
+ path);
+ else
+ (void) strlcpy(buf, path, sizeof (buf));
+ if (is_spare(config, buf))
+ return (0);
+ }
+
if (strcmp(type, VDEV_TYPE_DISK) == 0)
- ret = check_device(path, force);
+ ret = check_device(path, force, isspare);
if (strcmp(type, VDEV_TYPE_FILE) == 0)
- ret = check_file(path, force);
+ ret = check_file(path, force, isspare);
return (ret);
}
for (c = 0; c < children; c++)
- if ((ret = check_in_use(child[c], force)) != 0)
+ if ((ret = check_in_use(config, child[c], force,
+ isreplacing, B_FALSE)) != 0)
return (ret);
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0)
+ for (c = 0; c < children; c++)
+ if ((ret = check_in_use(config, child[c], force,
+ isreplacing, B_TRUE)) != 0)
+ return (ret);
+
return (0);
}
+const char *
+is_grouping(const char *type, int *mindev)
+{
+ if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
+ if (mindev != NULL)
+ *mindev = 2;
+ return (VDEV_TYPE_RAIDZ);
+ }
+
+ if (strcmp(type, "raidz2") == 0) {
+ if (mindev != NULL)
+ *mindev = 3;
+ return (VDEV_TYPE_RAIDZ);
+ }
+
+ if (strcmp(type, "mirror") == 0) {
+ if (mindev != NULL)
+ *mindev = 2;
+ return (VDEV_TYPE_MIRROR);
+ }
+
+ if (strcmp(type, "spare") == 0) {
+ if (mindev != NULL)
+ *mindev = 1;
+ return (VDEV_TYPE_SPARE);
+ }
+
+ return (NULL);
+}
+
/*
* Construct a syntactically valid vdev specification,
* and ensure that all devices and files exist and can be opened.
@@ -939,11 +1118,14 @@ check_in_use(nvlist_t *nv, int force)
nvlist_t *
construct_spec(int argc, char **argv)
{
- nvlist_t *nvroot, *nv, **top;
- int t, toplevels;
+ nvlist_t *nvroot, *nv, **top, **spares;
+ int t, toplevels, mindev, nspares;
+ const char *type;
top = NULL;
toplevels = 0;
+ spares = NULL;
+ nspares = 0;
while (argc > 0) {
nv = NULL;
@@ -952,17 +1134,20 @@ construct_spec(int argc, char **argv)
* If it's a mirror or raidz, the subsequent arguments are
* its leaves -- until we encounter the next mirror or raidz.
*/
- if (strcmp(argv[0], VDEV_TYPE_MIRROR) == 0 ||
- strcmp(argv[0], VDEV_TYPE_RAIDZ) == 0) {
-
- char *type = argv[0];
+ if ((type = is_grouping(argv[0], &mindev)) != NULL) {
nvlist_t **child = NULL;
- int children = 0;
- int c;
+ int c, children = 0;
+
+ if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+ spares != NULL) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: 'spare' can be "
+ "specified only once\n"));
+ return (NULL);
+ }
for (c = 1; c < argc; c++) {
- if (strcmp(argv[c], VDEV_TYPE_MIRROR) == 0 ||
- strcmp(argv[c], VDEV_TYPE_RAIDZ) == 0)
+ if (is_grouping(argv[c], NULL) != NULL)
break;
children++;
child = realloc(child,
@@ -974,29 +1159,38 @@ construct_spec(int argc, char **argv)
child[children - 1] = nv;
}
- argc -= c;
- argv += c;
-
- /*
- * Mirrors and RAID-Z devices require at least
- * two components.
- */
- if (children < 2) {
- (void) fprintf(stderr,
- gettext("invalid vdev specification: "
- "%s requires at least 2 devices\n"), type);
+ if (children < mindev) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: %s requires at least %d "
+ "devices\n"), argv[0], mindev);
return (NULL);
}
- verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) == 0);
- verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
- type) == 0);
- verify(nvlist_add_nvlist_array(nv,
- ZPOOL_CONFIG_CHILDREN, child, children) == 0);
+ argc -= c;
+ argv += c;
+
+ if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
+ spares = child;
+ nspares = children;
+ continue;
+ } else {
+ verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
+ 0) == 0);
+ verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
+ type) == 0);
+ if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+ verify(nvlist_add_uint64(nv,
+ ZPOOL_CONFIG_NPARITY,
+ mindev - 1) == 0);
+ }
+ verify(nvlist_add_nvlist_array(nv,
+ ZPOOL_CONFIG_CHILDREN, child,
+ children) == 0);
- for (c = 0; c < children; c++)
- nvlist_free(child[c]);
- free(child);
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ free(child);
+ }
} else {
/*
* We have a device. Pass off to make_leaf_vdev() to
@@ -1015,6 +1209,13 @@ construct_spec(int argc, char **argv)
top[toplevels - 1] = nv;
}
+ if (toplevels == 0 && nspares == 0) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: at least one toplevel vdev must be "
+ "specified\n"));
+ return (NULL);
+ }
+
/*
* Finally, create nvroot and add all top-level vdevs to it.
*/
@@ -1023,9 +1224,16 @@ construct_spec(int argc, char **argv)
VDEV_TYPE_ROOT) == 0);
verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
top, toplevels) == 0);
+ if (nspares != 0)
+ verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ spares, nspares) == 0);
for (t = 0; t < toplevels; t++)
nvlist_free(top[t]);
+ for (t = 0; t < nspares; t++)
+ nvlist_free(spares[t]);
+ if (spares)
+ free(spares);
free(top);
return (nvroot);
@@ -1043,7 +1251,7 @@ construct_spec(int argc, char **argv)
*/
nvlist_t *
make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
- int argc, char **argv)
+ boolean_t isreplacing, int argc, char **argv)
{
nvlist_t *newroot;
@@ -1063,7 +1271,8 @@ make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
* uses (such as a dedicated dump device) that even '-f' cannot
* override.
*/
- if (check_in_use(newroot, force) != 0) {
+ if (check_in_use(poolconfig, newroot, force, isreplacing,
+ B_FALSE) != 0) {
nvlist_free(newroot);
return (NULL);
}
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index c74f227bed..e8065c74f5 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -114,6 +114,7 @@ static uint64_t zopt_vdevtime;
static int zopt_ashift = SPA_MINBLOCKSHIFT;
static int zopt_mirrors = 2;
static int zopt_raidz = 4;
+static int zopt_raidz_parity = 1;
static size_t zopt_vdev_size = SPA_MINDEVSIZE;
static int zopt_datasets = 7;
static int zopt_threads = 23;
@@ -346,6 +347,7 @@ usage(void)
"\t[-a alignment_shift (default: %d) (use 0 for random)]\n"
"\t[-m mirror_copies (default: %d)]\n"
"\t[-r raidz_disks (default: %d)]\n"
+ "\t[-R raidz_parity (default: %d)]\n"
"\t[-d datasets (default: %d)]\n"
"\t[-t threads (default: %d)]\n"
"\t[-g gang_block_threshold (default: %s)]\n"
@@ -364,6 +366,7 @@ usage(void)
zopt_ashift, /* -a */
zopt_mirrors, /* -m */
zopt_raidz, /* -r */
+ zopt_raidz_parity, /* -R */
zopt_datasets, /* -d */
zopt_threads, /* -t */
nice_gang_bang, /* -g */
@@ -407,7 +410,7 @@ process_options(int argc, char **argv)
zio_gang_bang = 32 << 10;
while ((opt = getopt(argc, argv,
- "v:s:a:m:r:d:t:g:i:k:p:f:VET:P:")) != EOF) {
+ "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:")) != EOF) {
value = 0;
switch (opt) {
case 'v':
@@ -415,6 +418,7 @@ process_options(int argc, char **argv)
case 'a':
case 'm':
case 'r':
+ case 'R':
case 'd':
case 't':
case 'g':
@@ -440,6 +444,9 @@ process_options(int argc, char **argv)
case 'r':
zopt_raidz = MAX(1, value);
break;
+ case 'R':
+ zopt_raidz_parity = MIN(MAX(value, 1), 2);
+ break;
case 'd':
zopt_datasets = MAX(1, value);
break;
@@ -480,8 +487,10 @@ process_options(int argc, char **argv)
}
}
+ zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
+
zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
- zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz >= 2 ? 2 : 1) - 1;
+ zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
}
static uint64_t
@@ -542,6 +551,8 @@ make_vdev_raidz(size_t size, int r)
VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
VDEV_TYPE_RAIDZ) == 0);
+ VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
+ zopt_raidz_parity) == 0);
VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
child, r) == 0);
@@ -671,7 +682,7 @@ ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap)
error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0,
DMU_OT_NONE, 0, tx);
- ASSERT(error == 0);
+ ASSERT3U(error, ==, 0);
dmu_tx_commit(tx);
if (zopt_verbose >= 5) {
diff --git a/usr/src/lib/libdiskmgt/common/entry.c b/usr/src/lib/libdiskmgt/common/entry.c
index 860801b41d..61bc9d60d4 100644
--- a/usr/src/lib/libdiskmgt/common/entry.c
+++ b/usr/src/lib/libdiskmgt/common/entry.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -966,6 +965,10 @@ dm_get_usage_string(char *what, char *how, char **usage_string)
*usage_string = dgettext(TEXT_DOMAIN,
"%s is part of active ZFS pool %s. Please see zpool(1M)."
"\n");
+ } else if (strcmp(what, DM_USE_SPARE_ZPOOL) == 0) {
+ *usage_string = dgettext(TEXT_DOMAIN,
+ "%s is reserved as a hot spare for ZFS pool %s. Please "
+ "see zpool(1M).\n");
}
}
void
diff --git a/usr/src/lib/libdiskmgt/common/inuse_zpool.c b/usr/src/lib/libdiskmgt/common/inuse_zpool.c
index 1637ace92d..a7cf203a2f 100644
--- a/usr/src/lib/libdiskmgt/common/inuse_zpool.c
+++ b/usr/src/lib/libdiskmgt/common/inuse_zpool.c
@@ -46,17 +46,21 @@
#include <ctype.h>
#include <sys/fs/zfs.h>
+#include <libzfs.h>
#include "libdiskmgt.h"
#include "disks_private.h"
/*
* Pointers to libzfs.so functions that we dynamically resolve.
*/
-static int (*zfsdl_zpool_in_use)(int fd, pool_state_t *state, char **name);
+static int (*zfsdl_zpool_in_use)(libzfs_handle_t *hdl, int fd,
+ pool_state_t *state, char **name, boolean_t *);
+static libzfs_handle_t *(*zfsdl_libzfs_init)(boolean_t);
static mutex_t init_lock = DEFAULTMUTEX;
static rwlock_t zpool_lock = DEFAULTRWLOCK;
-static int initialized = 0;
+static boolean_t initialized;
+static libzfs_handle_t *zfs_hdl;
static void *init_zpool();
@@ -67,6 +71,7 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type)
char *name;
int fd;
pool_state_t state;
+ boolean_t used;
*errp = 0;
if (slice == NULL) {
@@ -83,15 +88,21 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type)
(void) mutex_unlock(&init_lock);
return (found);
}
- initialized = 1;
+ initialized = B_TRUE;
}
(void) mutex_unlock(&init_lock);
(void) rw_rdlock(&zpool_lock);
if ((fd = open(slice, O_RDONLY)) > 0) {
- if (zfsdl_zpool_in_use(fd, &state, &name)) {
+ name = NULL;
+ if (zfsdl_zpool_in_use(zfs_hdl, fd, &state,
+ &name, &used) == 0 && used) {
if (strcmp(type, DM_USE_ACTIVE_ZPOOL) == 0) {
- if (state == POOL_STATE_ACTIVE)
+ if (state == POOL_STATE_ACTIVE) {
found = 1;
+ } else if (state == POOL_STATE_SPARE) {
+ found = 1;
+ type = DM_USE_SPARE_ZPOOL;
+ }
} else {
found = 1;
}
@@ -100,9 +111,11 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type)
libdiskmgt_add_str(attrs, DM_USED_BY,
type, errp);
libdiskmgt_add_str(attrs, DM_USED_NAME,
- name, errp);
+ name, errp);
}
}
+ if (name)
+ free(name);
(void) close(fd);
}
(void) rw_unlock(&zpool_lock);
@@ -133,15 +146,24 @@ init_zpool()
if ((lh = dlopen("libzfs.so", RTLD_NOW)) == NULL) {
return (lh);
}
+
/*
* Instantiate the functions needed to get zpool configuration
* data
*/
- if ((zfsdl_zpool_in_use = (int (*)(int, pool_state_t *, char **))
+ if ((zfsdl_libzfs_init = (libzfs_handle_t *(*)(boolean_t))
+ dlsym(lh, "libzfs_init")) == NULL ||
+ (zfsdl_zpool_in_use = (int (*)(libzfs_handle_t *, int,
+ pool_state_t *, char **, boolean_t *))
dlsym(lh, "zpool_in_use")) == NULL) {
(void) dlclose(lh);
return (NULL);
}
+ if ((zfs_hdl = (*zfsdl_libzfs_init)(B_FALSE)) == NULL) {
+ (void) dlclose(lh);
+ return (NULL);
+ }
+
return (lh);
}
diff --git a/usr/src/lib/libdiskmgt/common/libdiskmgt.h b/usr/src/lib/libdiskmgt/common/libdiskmgt.h
index aa6df0967e..7d6fef46d4 100644
--- a/usr/src/lib/libdiskmgt/common/libdiskmgt.h
+++ b/usr/src/lib/libdiskmgt/common/libdiskmgt.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -215,6 +214,7 @@ typedef enum {
#define DM_USE_VFSTAB "vfstab"
#define DM_USE_EXPORTED_ZPOOL "exported_zpool"
#define DM_USE_ACTIVE_ZPOOL "active_zpool"
+#define DM_USE_SPARE_ZPOOL "spare_zpool"
/* event */
#define DM_EV_NAME "name"
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 0044ccd7c9..bf4b2874ad 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -47,16 +47,78 @@ extern "C" {
#define ZFS_MAXPROPLEN MAXPATHLEN
/*
+ * libzfs errors
+ */
+enum {
+ EZFS_NOMEM = 2000, /* out of memory */
+ EZFS_BADPROP, /* invalid property value */
+ EZFS_PROPREADONLY, /* cannot set readonly property */
+ EZFS_PROPTYPE, /* property does not apply to dataset type */
+ EZFS_PROPNONINHERIT, /* property is not inheritable */
+ EZFS_PROPSPACE, /* bad quota or reservation */
+ EZFS_BADTYPE, /* dataset is not of appropriate type */
+ EZFS_BUSY, /* pool or dataset is busy */
+ EZFS_EXISTS, /* pool or dataset already exists */
+ EZFS_NOENT, /* no such pool or dataset */
+ EZFS_BADSTREAM, /* bad backup stream */
+ EZFS_DSREADONLY, /* dataset is readonly */
+ EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */
+ EZFS_VOLHASDATA, /* volume already contains data */
+ EZFS_INVALIDNAME, /* invalid dataset name */
+ EZFS_BADRESTORE, /* unable to restore to destination */
+ EZFS_BADBACKUP, /* backup failed */
+ EZFS_BADTARGET, /* bad attach/detach/replace target */
+ EZFS_NODEVICE, /* no such device in pool */
+ EZFS_BADDEV, /* invalid device to add */
+ EZFS_NOREPLICAS, /* no valid replicas */
+ EZFS_RESILVERING, /* currently resilvering */
+ EZFS_BADVERSION, /* unsupported version */
+ EZFS_POOLUNAVAIL, /* pool is currently unavailable */
+ EZFS_DEVOVERFLOW, /* too many devices in one vdev */
+ EZFS_BADPATH, /* must be an absolute path */
+ EZFS_CROSSTARGET, /* rename or clone across pool or dataset */
+ EZFS_ZONED, /* used improperly in local zone */
+ EZFS_MOUNTFAILED, /* failed to mount dataset */
+ EZFS_UMOUNTFAILED, /* failed to unmount dataset */
+ EZFS_UNSHAREFAILED, /* unshare(1M) failed */
+ EZFS_SHAREFAILED, /* share(1M) failed */
+ EZFS_DEVLINKS, /* failed to create zvol links */
+ EZFS_PERM, /* permission denied */
+ EZFS_NOSPC, /* out of space */
+ EZFS_IO, /* I/O error */
+ EZFS_INTR, /* signal received */
+ EZFS_ISSPARE, /* device is a hot spare */
+ EZFS_INVALCONFIG, /* invalid vdev configuration */
+ EZFS_UNKNOWN /* unknown error */
+};
+
+/*
* Basic handle types
*/
typedef struct zfs_handle zfs_handle_t;
typedef struct zpool_handle zpool_handle_t;
+typedef struct libzfs_handle libzfs_handle_t;
+
+/*
+ * Library initialization
+ */
+extern libzfs_handle_t *libzfs_init(void);
+extern void libzfs_fini(libzfs_handle_t *);
+
+extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
+extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
+
+extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
+
+extern int libzfs_errno(libzfs_handle_t *);
+extern const char *libzfs_error_action(libzfs_handle_t *);
+extern const char *libzfs_error_description(libzfs_handle_t *);
/*
* Basic handle functions
*/
-extern zpool_handle_t *zpool_open(const char *);
-extern zpool_handle_t *zpool_open_canfail(const char *);
+extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
+extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
extern void zpool_close(zpool_handle_t *);
extern const char *zpool_get_name(zpool_handle_t *);
extern uint64_t zpool_get_guid(zpool_handle_t *);
@@ -64,17 +126,19 @@ extern uint64_t zpool_get_space_used(zpool_handle_t *);
extern uint64_t zpool_get_space_total(zpool_handle_t *);
extern int zpool_get_root(zpool_handle_t *, char *, size_t);
extern int zpool_get_state(zpool_handle_t *);
+extern uint64_t zpool_get_version(zpool_handle_t *);
/*
* Iterate over all active pools in the system.
*/
typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
-extern int zpool_iter(zpool_iter_f, void *);
+extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
/*
* Functions to create and destroy pools
*/
-extern int zpool_create(const char *, nvlist_t *, const char *);
+extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
+ const char *);
extern int zpool_destroy(zpool_handle_t *);
extern int zpool_add(zpool_handle_t *, nvlist_t *);
@@ -88,8 +152,9 @@ extern int zpool_vdev_offline(zpool_handle_t *, const char *, int);
extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *,
nvlist_t *, int);
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
+extern int zpool_vdev_remove(zpool_handle_t *, const char *);
extern int zpool_clear(zpool_handle_t *, const char *);
-extern uint64_t zpool_vdev_to_guid(zpool_handle_t *, const char *);
+extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *);
/*
* Pool health statistics.
@@ -143,24 +208,25 @@ extern int zpool_get_errlog(zpool_handle_t *, nvlist_t ***, size_t *);
* Import and export functions
*/
extern int zpool_export(zpool_handle_t *);
-extern int zpool_import(nvlist_t *, const char *, const char *);
+extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
+ const char *);
/*
* Search for pools to import
*/
-extern nvlist_t *zpool_find_import(int, char **);
+extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
/*
* Miscellaneous pool functions
*/
-extern char *zpool_vdev_name(zpool_handle_t *, nvlist_t *);
+extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
extern int zpool_upgrade(zpool_handle_t *);
/*
* Basic handle manipulations. These functions do not create or destroy the
* underlying datasets, only the references to them.
*/
-extern zfs_handle_t *zfs_open(const char *, int);
+extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
extern void zfs_close(zfs_handle_t *);
extern zfs_type_t zfs_get_type(const zfs_handle_t *);
extern const char *zfs_get_name(const zfs_handle_t *);
@@ -182,11 +248,11 @@ typedef enum {
const char *zfs_prop_to_name(zfs_prop_t);
int zfs_prop_set(zfs_handle_t *, zfs_prop_t, const char *);
int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zfs_source_t *,
- char *, size_t, int);
+ char *, size_t, boolean_t);
int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, zfs_source_t *,
char *, size_t);
uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
-int zfs_prop_validate(zfs_prop_t, const char *, uint64_t *);
+int zfs_prop_validate(libzfs_handle_t *, zfs_prop_t, const char *, uint64_t *);
int zfs_prop_inheritable(zfs_prop_t);
int zfs_prop_inherit(zfs_handle_t *, zfs_prop_t);
const char *zfs_prop_values(zfs_prop_t);
@@ -206,7 +272,7 @@ int zfs_get_proplist(char *fields, zfs_prop_t *proplist, int max, int *count,
* Iterator functions.
*/
typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
-extern int zfs_iter_root(zfs_iter_f, void *);
+extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
extern int zfs_iter_dependents(zfs_handle_t *, zfs_iter_f, void *);
extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
@@ -215,14 +281,16 @@ extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
/*
* Functions to create and destroy datasets.
*/
-extern int zfs_create(const char *, zfs_type_t, const char *, const char *);
+extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
+ const char *, const char *);
extern int zfs_destroy(zfs_handle_t *);
extern int zfs_clone(zfs_handle_t *, const char *);
-extern int zfs_snapshot(const char *);
+extern int zfs_snapshot(libzfs_handle_t *, const char *);
extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int);
extern int zfs_rename(zfs_handle_t *, const char *);
extern int zfs_send(zfs_handle_t *, zfs_handle_t *);
-extern int zfs_receive(const char *, int, int, int);
+extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int);
+extern int zfs_promote(zfs_handle_t *);
/*
* Miscellaneous functions.
@@ -234,7 +302,7 @@ extern int zfs_name_valid(const char *, zfs_type_t);
/*
* Mount support functions.
*/
-extern int zfs_is_mounted(zfs_handle_t *, char **);
+extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
extern int zfs_mount(zfs_handle_t *, const char *, int);
extern int zfs_unmount(zfs_handle_t *, const char *, int);
extern int zfs_unmountall(zfs_handle_t *, int);
@@ -242,17 +310,12 @@ extern int zfs_unmountall(zfs_handle_t *, int);
/*
* Share support functions.
*/
-extern int zfs_is_shared(zfs_handle_t *, char **);
+extern boolean_t zfs_is_shared(zfs_handle_t *, char **);
extern int zfs_share(zfs_handle_t *);
extern int zfs_unshare(zfs_handle_t *, const char *);
extern int zfs_unshareall(zfs_handle_t *);
/*
- * For clients that need to capture error output.
- */
-extern void zfs_set_error_handler(void (*)(const char *, va_list));
-
-/*
* When dealing with nvlists, verify() is extremely useful
*/
#ifdef NDEBUG
@@ -276,12 +339,13 @@ extern int zfs_remove_link(zfs_handle_t *);
/*
* Given a device or file, determine if it is part of a pool.
*/
-extern int zpool_in_use(int fd, pool_state_t *state, char **name);
+extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
+ boolean_t *);
/*
* ftyp special. Read the label from a given device.
*/
-extern nvlist_t *zpool_read_label(int fd);
+extern int zpool_read_label(int, nvlist_t **);
/*
* Create and remove zvol /dev links
@@ -289,21 +353,6 @@ extern nvlist_t *zpool_read_label(int fd);
extern int zpool_create_zvol_links(zpool_handle_t *);
extern int zpool_remove_zvol_links(zpool_handle_t *);
-/*
- * zoneadmd hack
- */
-extern void zfs_init(void);
-
-/*
- * Useful defines
- */
-#ifndef TRUE
-#define TRUE 1
-#endif
-#ifndef FALSE
-#define FALSE 0
-#endif
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libzfs/common/libzfs_changelist.c b/usr/src/lib/libzfs/common/libzfs_changelist.c
index 57fcc1497c..04270dfe51 100644
--- a/usr/src/lib/libzfs/common/libzfs_changelist.c
+++ b/usr/src/lib/libzfs/common/libzfs_changelist.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -73,11 +72,11 @@ struct prop_changelist {
zfs_prop_t cl_realprop;
uu_list_pool_t *cl_pool;
uu_list_t *cl_list;
- int cl_waslegacy;
- int cl_allchildren;
- int cl_alldependents;
+ boolean_t cl_waslegacy;
+ boolean_t cl_allchildren;
+ boolean_t cl_alldependents;
int cl_flags;
- int cl_haszonedchild;
+ boolean_t cl_haszonedchild;
};
/*
@@ -109,7 +108,8 @@ changelist_prefix(prop_changelist_t *clp)
*/
if (cn->cn_handle->zfs_volblocksize &&
clp->cl_realprop == ZFS_PROP_NAME) {
- if (zvol_remove_link(cn->cn_handle->zfs_name) != 0)
+ if (zvol_remove_link(cn->cn_handle->zfs_hdl,
+ cn->cn_handle->zfs_name) != 0)
ret = -1;
} else if (zfs_unmount(cn->cn_handle, NULL, clp->cl_flags) != 0)
ret = -1;
@@ -167,7 +167,8 @@ changelist_postfix(prop_changelist_t *clp)
*/
if (cn->cn_handle->zfs_volblocksize &&
clp->cl_realprop == ZFS_PROP_NAME) {
- if (zvol_create_link(cn->cn_handle->zfs_name) != 0)
+ if (zvol_create_link(cn->cn_handle->zfs_hdl,
+ cn->cn_handle->zfs_name) != 0)
ret = -1;
continue;
}
@@ -186,7 +187,7 @@ changelist_postfix(prop_changelist_t *clp)
char shareopts[ZFS_MAXPROPLEN];
if (zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS,
shareopts, sizeof (shareopts), NULL, NULL, 0,
- FALSE) == 0 && strcmp(shareopts, "off") == 0)
+ B_FALSE) == 0 && strcmp(shareopts, "off") == 0)
ret = zfs_unshare(cn->cn_handle, NULL);
else
ret = zfs_share(cn->cn_handle);
@@ -199,22 +200,22 @@ changelist_postfix(prop_changelist_t *clp)
/*
* Is this "dataset" a child of "parent"?
*/
-static int
+static boolean_t
isa_child_of(char *dataset, const char *parent)
{
int len;
/* snapshot does not have a child */
if (strchr(parent, '@'))
- return (FALSE);
+ return (B_FALSE);
len = strlen(parent);
if (strncmp(dataset, parent, len) == 0 &&
(dataset[len] == '/' || dataset[len] == '\0'))
- return (TRUE);
+ return (B_TRUE);
else
- return (FALSE);
+ return (B_FALSE);
}
@@ -326,6 +327,9 @@ changelist_free(prop_changelist_t *clp)
free(cn);
}
+ uu_list_walk_end(walk);
+
+ uu_list_destroy(clp->cl_list);
uu_list_pool_destroy(clp->cl_pool);
free(clp);
@@ -353,12 +357,18 @@ change_one(zfs_handle_t *zhp, void *data)
if (!(zhp->zfs_volblocksize && clp->cl_realprop == ZFS_PROP_NAME) &&
zfs_prop_get(zhp, clp->cl_prop, property,
sizeof (property), &sourcetype, where, sizeof (where),
- FALSE) != 0)
+ B_FALSE) != 0) {
+ zfs_close(zhp);
return (0);
+ }
if (clp->cl_alldependents || clp->cl_allchildren ||
sourcetype == ZFS_SRC_DEFAULT || sourcetype == ZFS_SRC_INHERITED) {
- cn = zfs_malloc(sizeof (prop_changenode_t));
+ if ((cn = zfs_alloc(zfs_get_handle(zhp),
+ sizeof (prop_changenode_t))) == NULL) {
+ zfs_close(zhp);
+ return (-1);
+ }
cn->cn_handle = zhp;
cn->cn_mounted = zfs_is_mounted(zhp, NULL);
@@ -367,7 +377,7 @@ change_one(zfs_handle_t *zhp, void *data)
/* indicate if any child is exported to a local zone */
if ((getzoneid() == GLOBAL_ZONEID) && cn->cn_zoned)
- clp->cl_haszonedchild = TRUE;
+ clp->cl_haszonedchild = B_TRUE;
uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
@@ -399,11 +409,14 @@ change_one(zfs_handle_t *zhp, void *data)
prop_changelist_t *
changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
{
- prop_changelist_t *clp = zfs_malloc(sizeof (prop_changelist_t));
+ prop_changelist_t *clp;
prop_changenode_t *cn;
zfs_handle_t *temp;
char property[ZFS_MAXPROPLEN];
+ if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL)
+ return (NULL);
+
clp->cl_pool = uu_list_pool_create("changelist_pool",
sizeof (prop_changenode_t),
offsetof(prop_changenode_t, cn_listnode),
@@ -423,10 +436,10 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
*/
if (prop == ZFS_PROP_NAME) {
clp->cl_prop = ZFS_PROP_MOUNTPOINT;
- clp->cl_alldependents = TRUE;
+ clp->cl_alldependents = B_TRUE;
} else if (prop == ZFS_PROP_ZONED) {
clp->cl_prop = ZFS_PROP_MOUNTPOINT;
- clp->cl_allchildren = TRUE;
+ clp->cl_allchildren = B_TRUE;
} else {
clp->cl_prop = prop;
}
@@ -450,8 +463,9 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
* We have to re-open ourselves because we auto-close all the handles
* and can't tell the difference.
*/
- if ((temp = zfs_open(zfs_get_name(zhp), ZFS_TYPE_ANY)) == NULL) {
- free(clp);
+ if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp),
+ ZFS_TYPE_ANY)) == NULL) {
+ changelist_free(clp);
return (NULL);
}
@@ -459,7 +473,13 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
* Always add ourself to the list. We add ourselves to the end so that
* we're the last to be unmounted.
*/
- cn = zfs_malloc(sizeof (prop_changenode_t));
+ if ((cn = zfs_alloc(zhp->zfs_hdl,
+ sizeof (prop_changenode_t))) == NULL) {
+ zfs_close(temp);
+ changelist_free(clp);
+ return (NULL);
+ }
+
cn->cn_handle = temp;
cn->cn_mounted = zfs_is_mounted(temp, NULL);
cn->cn_shared = zfs_is_shared(temp, NULL);
@@ -474,10 +494,10 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
* as the behavior of changelist_postfix() will be different.
*/
if (zfs_prop_get(zhp, prop, property, sizeof (property),
- NULL, NULL, 0, FALSE) == 0 &&
+ NULL, NULL, 0, B_FALSE) == 0 &&
(strcmp(property, "legacy") == 0 || strcmp(property, "none") == 0 ||
strcmp(property, "off") == 0))
- clp->cl_waslegacy = TRUE;
+ clp->cl_waslegacy = B_TRUE;
return (clp);
}
diff --git a/usr/src/lib/libzfs/common/libzfs_config.c b/usr/src/lib/libzfs/common/libzfs_config.c
index 71801d5cba..be691f0ced 100644
--- a/usr/src/lib/libzfs/common/libzfs_config.c
+++ b/usr/src/lib/libzfs/common/libzfs_config.c
@@ -45,9 +45,6 @@
#include "libzfs_impl.h"
-static uu_avl_t *namespace_avl;
-static uint64_t namespace_generation;
-
typedef struct config_node {
char *cn_name;
nvlist_t *cn_config;
@@ -73,11 +70,41 @@ config_node_compare(const void *a, const void *b, void *unused)
return (0);
}
+void
+namespace_clear(libzfs_handle_t *hdl)
+{
+ if (hdl->libzfs_ns_avl) {
+ uu_avl_walk_t *walk;
+ config_node_t *cn;
+
+ if ((walk = uu_avl_walk_start(hdl->libzfs_ns_avl,
+ UU_WALK_ROBUST)) == NULL)
+ return;
+
+ while ((cn = uu_avl_walk_next(walk)) != NULL) {
+ uu_avl_remove(hdl->libzfs_ns_avl, cn);
+ nvlist_free(cn->cn_config);
+ free(cn->cn_name);
+ free(cn);
+ }
+
+ uu_avl_walk_end(walk);
+
+ uu_avl_destroy(hdl->libzfs_ns_avl);
+ hdl->libzfs_ns_avl = NULL;
+ }
+
+ if (hdl->libzfs_ns_avlpool) {
+ uu_avl_pool_destroy(hdl->libzfs_ns_avlpool);
+ hdl->libzfs_ns_avlpool = NULL;
+ }
+}
+
/*
* Loads the pool namespace, or re-loads it if the cache has changed.
*/
-static void
-namespace_reload()
+static int
+namespace_reload(libzfs_handle_t *hdl)
{
nvlist_t *config;
config_node_t *cn;
@@ -85,23 +112,21 @@ namespace_reload()
zfs_cmd_t zc = { 0 };
uu_avl_walk_t *walk;
- if (namespace_generation == 0) {
+ if (hdl->libzfs_ns_gen == 0) {
/*
* This is the first time we've accessed the configuration
* cache. Initialize the AVL tree and then fall through to the
* common code.
*/
- uu_avl_pool_t *pool;
-
- if ((pool = uu_avl_pool_create("config_pool",
+ if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool",
sizeof (config_node_t),
offsetof(config_node_t, cn_avl),
config_node_compare, UU_DEFAULT)) == NULL)
- no_memory();
+ return (no_memory(hdl));
- if ((namespace_avl = uu_avl_create(pool, NULL,
- UU_DEFAULT)) == NULL)
- no_memory();
+ if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool,
+ NULL, UU_DEFAULT)) == NULL)
+ return (no_memory(hdl));
}
/*
@@ -114,68 +139,92 @@ namespace_reload()
* been modified to tell us how much to allocate.
*/
zc.zc_config_dst_size = 1024;
- zc.zc_config_dst = (uint64_t)(uintptr_t)
- zfs_malloc(zc.zc_config_dst_size);
+ if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+ zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL)
+ return (-1);
for (;;) {
- zc.zc_cookie = namespace_generation;
- if (zfs_ioctl(ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
+ zc.zc_cookie = hdl->libzfs_ns_gen;
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
switch (errno) {
case EEXIST:
/*
* The namespace hasn't changed.
*/
free((void *)(uintptr_t)zc.zc_config_dst);
- return;
+ return (0);
case ENOMEM:
free((void *)(uintptr_t)zc.zc_config_dst);
- zc.zc_config_dst = (uint64_t)(uintptr_t)
- zfs_malloc(zc.zc_config_dst_size);
+ if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+ zfs_alloc(hdl, zc.zc_config_dst_size))
+ == NULL)
+ return (-1);
break;
default:
- zfs_baderror(errno);
+ return (zfs_standard_error(hdl, errno,
+ dgettext(TEXT_DOMAIN, "failed to read "
+ "pool configuration")));
}
} else {
- namespace_generation = zc.zc_cookie;
+ hdl->libzfs_ns_gen = zc.zc_cookie;
break;
}
}
- verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
- zc.zc_config_dst_size, &config, 0) == 0);
+ if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
+ zc.zc_config_dst_size, &config, 0) != 0)
+ return (no_memory(hdl));
free((void *)(uintptr_t)zc.zc_config_dst);
/*
* Clear out any existing configuration information.
*/
- if ((walk = uu_avl_walk_start(namespace_avl, UU_WALK_ROBUST)) == NULL)
- no_memory();
+ if ((walk = uu_avl_walk_start(hdl->libzfs_ns_avl,
+ UU_WALK_ROBUST)) == NULL) {
+ nvlist_free(config);
+ return (no_memory(hdl));
+ }
while ((cn = uu_avl_walk_next(walk)) != NULL) {
- uu_avl_remove(namespace_avl, cn);
+ uu_avl_remove(hdl->libzfs_ns_avl, cn);
nvlist_free(cn->cn_config);
free(cn->cn_name);
free(cn);
}
+ uu_avl_walk_end(walk);
+
elem = NULL;
while ((elem = nvlist_next_nvpair(config, elem)) != NULL) {
nvlist_t *child;
uu_avl_index_t where;
- cn = zfs_malloc(sizeof (config_node_t));
- cn->cn_name = zfs_strdup(nvpair_name(elem));
+ if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) {
+ nvlist_free(config);
+ return (-1);
+ }
+
+ if ((cn->cn_name = zfs_strdup(hdl,
+ nvpair_name(elem))) == NULL) {
+ free(cn);
+ return (-1);
+ }
verify(nvpair_value_nvlist(elem, &child) == 0);
- verify(nvlist_dup(child, &cn->cn_config, 0) == 0);
- verify(uu_avl_find(namespace_avl, cn, NULL, &where) == NULL);
+ if (nvlist_dup(child, &cn->cn_config, 0) != 0) {
+ nvlist_free(config);
+ return (no_memory(hdl));
+ }
+ verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where)
+ == NULL);
- uu_avl_insert(namespace_avl, cn, where);
+ uu_avl_insert(hdl->libzfs_ns_avl, cn, where);
}
nvlist_free(config);
+ return (0);
}
/*
@@ -209,35 +258,43 @@ zpool_refresh_stats(zpool_handle_t *zhp)
zhp->zpool_config_size = 1 << 16;
zc.zc_config_dst_size = zhp->zpool_config_size;
- zc.zc_config_dst = (uint64_t)(uintptr_t)
- zfs_malloc(zc.zc_config_dst_size);
+ if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+ zfs_alloc(zhp->zpool_hdl, zc.zc_config_dst_size)) == NULL)
+ return (-1);
for (;;) {
- if (zfs_ioctl(ZFS_IOC_POOL_STATS, &zc) == 0) {
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS,
+ &zc) == 0) {
/*
* The real error is returned in the zc_cookie field.
*/
- error = zc.zc_cookie;
+ error = errno = zc.zc_cookie;
break;
}
if (errno == ENOMEM) {
free((void *)(uintptr_t)zc.zc_config_dst);
- zc.zc_config_dst = (uint64_t)(uintptr_t)
- zfs_malloc(zc.zc_config_dst_size);
+ if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+ zfs_alloc(zhp->zpool_hdl,
+ zc.zc_config_dst_size)) == NULL)
+ return (-1);
} else {
free((void *)(uintptr_t)zc.zc_config_dst);
- return (errno);
+ return (-1);
}
}
- verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
- zc.zc_config_dst_size, &config, 0) == 0);
+ if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
+ zc.zc_config_dst_size, &config, 0) != 0) {
+ free((void *)(uintptr_t)zc.zc_config_dst);
+ return (no_memory(zhp->zpool_hdl));
+ }
zhp->zpool_config_size = zc.zc_config_dst_size;
free((void *)(uintptr_t)zc.zc_config_dst);
- set_pool_health(config);
+ if (set_pool_health(config) != 0)
+ return (no_memory(zhp->zpool_hdl));
if (zhp->zpool_config != NULL) {
uint64_t oldtxg, newtxg;
@@ -260,25 +317,26 @@ zpool_refresh_stats(zpool_handle_t *zhp)
zhp->zpool_config = config;
- return (error);
+ return (error ? -1 : 0);
}
/*
* Iterate over all pools in the system.
*/
int
-zpool_iter(zpool_iter_f func, void *data)
+zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
{
config_node_t *cn;
zpool_handle_t *zhp;
int ret;
- namespace_reload();
+ if (namespace_reload(hdl) != 0)
+ return (-1);
- for (cn = uu_avl_first(namespace_avl); cn != NULL;
- cn = uu_avl_next(namespace_avl, cn)) {
+ for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+ cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
- if ((zhp = zpool_open_silent(cn->cn_name)) == NULL)
+ if ((zhp = zpool_open_silent(hdl, cn->cn_name)) == NULL)
continue;
if ((ret = func(zhp, data)) != 0)
@@ -293,18 +351,19 @@ zpool_iter(zpool_iter_f func, void *data)
* handle passed each time must be explicitly closed by the callback.
*/
int
-zfs_iter_root(zfs_iter_f func, void *data)
+zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data)
{
config_node_t *cn;
zfs_handle_t *zhp;
int ret;
- namespace_reload();
+ if (namespace_reload(hdl) != 0)
+ return (-1);
- for (cn = uu_avl_first(namespace_avl); cn != NULL;
- cn = uu_avl_next(namespace_avl, cn)) {
+ for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+ cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
- if ((zhp = make_dataset_handle(cn->cn_name)) == NULL)
+ if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL)
continue;
if ((ret = func(zhp, data)) != 0)
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index f23136c8aa..14ba6112ed 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -36,6 +36,7 @@
#include <strings.h>
#include <unistd.h>
#include <zone.h>
+#include <fcntl.h>
#include <sys/mntent.h>
#include <sys/mnttab.h>
#include <sys/mount.h>
@@ -64,7 +65,6 @@ zfs_type_to_name(zfs_type_t type)
return (dgettext(TEXT_DOMAIN, "volume"));
}
- zfs_baderror(type);
return (NULL);
}
@@ -118,43 +118,43 @@ path_to_str(const char *path, int types)
* 'buf' detailing exactly why the name was not valid.
*/
static int
-zfs_validate_name(const char *path, int type, char *buf, size_t buflen)
+zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type)
{
namecheck_err_t why;
char what;
if (dataset_namecheck(path, &why, &what) != 0) {
- if (buf != NULL) {
+ if (hdl != NULL) {
switch (why) {
case NAME_ERR_TOOLONG:
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "name is too long"), buflen);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "name is too long"));
break;
case NAME_ERR_LEADING_SLASH:
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "leading slash"), buflen);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "leading slash in name"));
break;
case NAME_ERR_EMPTY_COMPONENT:
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "empty component"), buflen);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "empty component in name"));
break;
case NAME_ERR_TRAILING_SLASH:
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "trailing slash"), buflen);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "trailing slash in name"));
break;
case NAME_ERR_INVALCHAR:
- (void) snprintf(buf, buflen,
+ zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "invalid character "
- "'%c'"), what);
+ "'%c' in name"), what);
break;
case NAME_ERR_MULTIPLE_AT:
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "multiple '@' delimiters"), buflen);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "multiple '@' delimiters in name"));
break;
}
}
@@ -163,20 +163,19 @@ zfs_validate_name(const char *path, int type, char *buf, size_t buflen)
}
if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
- if (buf != NULL)
- (void) strlcpy(buf,
- dgettext(TEXT_DOMAIN,
- "snapshot delimiter '@'"), buflen);
+ if (hdl != NULL)
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "snapshot delimiter '@' in filesystem name"));
return (0);
}
- return (1);
+ return (-1);
}
int
zfs_name_valid(const char *name, zfs_type_t type)
{
- return (zfs_validate_name(name, type, NULL, NULL));
+ return (zfs_validate_name(NULL, name, type));
}
/*
@@ -189,13 +188,16 @@ get_stats(zfs_handle_t *zhp)
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- zc.zc_config_src = (uint64_t)(uintptr_t)zfs_malloc(1024);
+ if ((zc.zc_config_src = (uint64_t)(uintptr_t)malloc(1024)) == NULL)
+ return (-1);
zc.zc_config_src_size = 1024;
- while (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+ while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
if (errno == ENOMEM) {
- zc.zc_config_src = (uint64_t)(uintptr_t)
- zfs_malloc(zc.zc_config_src_size);
+ free((void *)(uintptr_t)zc.zc_config_src);
+ if ((zc.zc_config_src = (uint64_t)(uintptr_t)
+ malloc(zc.zc_config_src_size)) == NULL)
+ return (-1);
} else {
free((void *)(uintptr_t)zc.zc_config_src);
return (-1);
@@ -207,12 +209,22 @@ get_stats(zfs_handle_t *zhp)
(void) strcpy(zhp->zfs_root, zc.zc_root);
- verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_src,
- zc.zc_config_src_size, &zhp->zfs_props, 0) == 0);
+ if (zhp->zfs_props) {
+ nvlist_free(zhp->zfs_props);
+ zhp->zfs_props = NULL;
+ }
+
+ if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_src,
+ zc.zc_config_src_size, &zhp->zfs_props, 0) != 0) {
+ free((void *)(uintptr_t)zc.zc_config_src);
+ return (-1);
+ }
zhp->zfs_volsize = zc.zc_volsize;
zhp->zfs_volblocksize = zc.zc_volblocksize;
+ free((void *)(uintptr_t)zc.zc_config_src);
+
return (0);
}
@@ -230,9 +242,14 @@ zfs_refresh_properties(zfs_handle_t *zhp)
* zfs_iter_* to create child handles on the fly.
*/
zfs_handle_t *
-make_dataset_handle(const char *path)
+make_dataset_handle(libzfs_handle_t *hdl, const char *path)
{
- zfs_handle_t *zhp = zfs_malloc(sizeof (zfs_handle_t));
+ zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
+
+ if (zhp == NULL)
+ return (NULL);
+
+ zhp->zfs_hdl = hdl;
top:
(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
@@ -263,20 +280,20 @@ top:
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
- (void) zvol_remove_link(zhp->zfs_name);
+ (void) zvol_remove_link(hdl, zhp->zfs_name);
zc.zc_objset_type = DMU_OST_ZVOL;
} else {
zc.zc_objset_type = DMU_OST_ZFS;
}
/* If we can successfully roll it back, reget the stats */
- if (zfs_ioctl(ZFS_IOC_ROLLBACK, &zc) == 0)
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
goto top;
/*
* If we can sucessfully destroy it, pretend that it
* never existed.
*/
- if (zfs_ioctl(ZFS_IOC_DESTROY, &zc) == 0) {
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
free(zhp);
errno = ENOENT;
return (NULL);
@@ -294,8 +311,7 @@ top:
else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
else
- /* we should never see any other dataset types */
- zfs_baderror(zhp->zfs_dmustats.dds_type);
+ abort(); /* we should never see any other types */
return (zhp);
}
@@ -306,18 +322,21 @@ top:
* appropriate error message and return NULL if it can't be opened.
*/
zfs_handle_t *
-zfs_open(const char *path, int types)
+zfs_open(libzfs_handle_t *hdl, const char *path, int types)
{
zfs_handle_t *zhp;
+ char errbuf[1024];
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
/*
- * Validate the name before we even try to open it. We don't care about
- * the verbose invalid messages here; just report a generic error.
+ * Validate the name before we even try to open it.
*/
- if (!zfs_validate_name(path, types, NULL, 0)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot open '%s': invalid %s name"), path,
- path_to_str(path, types));
+ if (!zfs_validate_name(hdl, path, ZFS_TYPE_ANY)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid dataset name"));
+ (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
return (NULL);
}
@@ -325,48 +344,13 @@ zfs_open(const char *path, int types)
* Try to get stats for the dataset, which will tell us if it exists.
*/
errno = 0;
- if ((zhp = make_dataset_handle(path)) == NULL) {
- switch (errno) {
- case ENOENT:
- /*
- * The dataset doesn't exist.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot open '%s': no such %s"), path,
- path_to_str(path, types));
- break;
-
- case EBUSY:
- /*
- * We were able to open the dataset but couldn't
- * get the stats.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot open '%s': %s is busy"), path,
- path_to_str(path, types));
- break;
-
- case ENXIO:
- case EIO:
- /*
- * I/O error from the underlying pool.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot open '%s': I/O error"), path,
- path_to_str(path, types));
- break;
-
- default:
- zfs_baderror(errno);
-
- }
+ if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
+ (void) zfs_standard_error(hdl, errno, errbuf, path);
return (NULL);
}
if (!(types & zhp->zfs_type)) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': operation "
- "not supported for %ss"), path,
- zfs_type_to_name(zhp->zfs_type));
+ (void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
free(zhp);
return (NULL);
}
@@ -382,6 +366,8 @@ zfs_close(zfs_handle_t *zhp)
{
if (zhp->zfs_mntopts)
free(zhp->zfs_mntopts);
+ if (zhp->zfs_props)
+ nvlist_free(zhp->zfs_props);
free(zhp);
}
@@ -443,7 +429,7 @@ struct {
* resulting value must be shifted.
*/
static int
-str2shift(const char *buf, char *reason, size_t len)
+str2shift(libzfs_handle_t *hdl, const char *buf)
{
const char *ends = "BKMGTPEZ";
int i;
@@ -455,8 +441,8 @@ str2shift(const char *buf, char *reason, size_t len)
break;
}
if (i == strlen(ends)) {
- (void) snprintf(reason, len, dgettext(TEXT_DOMAIN, "invalid "
- "numeric suffix '%s'"), buf);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid numeric suffix '%s'"), buf);
return (-1);
}
@@ -465,12 +451,11 @@ str2shift(const char *buf, char *reason, size_t len)
* allow 'BB' - that's just weird.
*/
if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' &&
- toupper(buf[0]) != 'B')) {
+ toupper(buf[0]) != 'B'))
return (10*i);
- }
- (void) snprintf(reason, len, dgettext(TEXT_DOMAIN, "invalid numeric "
- "suffix '%s'"), buf);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid numeric suffix '%s'"), buf);
return (-1);
}
@@ -480,7 +465,7 @@ str2shift(const char *buf, char *reason, size_t len)
* message for the caller to use.
*/
static int
-nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
+nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
{
char *end;
int shift;
@@ -489,8 +474,9 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
/* Check to see if this looks like a number. */
if ((value[0] < '0' || value[0] > '9') && value[0] != '.') {
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "must be a numeric value"), buflen);
+ if (hdl)
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "bad numeric value '%s'"), value);
return (-1);
}
@@ -503,8 +489,9 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
* in a 64-bit value.
*/
if (errno == ERANGE) {
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "value is too large"), buflen);
+ if (hdl)
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "numeric value is too large"));
return (-1);
}
@@ -515,26 +502,28 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
if (*end == '.') {
double fval = strtod(value, &end);
- if ((shift = str2shift(end, buf, buflen)) == -1)
+ if ((shift = str2shift(hdl, end)) == -1)
return (-1);
fval *= pow(2, shift);
if (fval > UINT64_MAX) {
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "value is too large"), buflen);
+ if (hdl)
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "numeric value is too large"));
return (-1);
}
*num = (uint64_t)fval;
} else {
- if ((shift = str2shift(end, buf, buflen)) == -1)
+ if ((shift = str2shift(hdl, end)) == -1)
return (-1);
/* Check for overflow */
if (shift >= 64 || (*num << shift) >> shift != *num) {
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "value is too large"), buflen);
+ if (hdl)
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "numeric value is too large"));
return (-1);
}
@@ -547,9 +536,7 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
int
zfs_nicestrtonum(const char *str, uint64_t *val)
{
- char buf[1];
-
- return (nicestrtonum(str, val, buf, sizeof (buf)));
+ return (nicestrtonum(NULL, str, val));
}
/*
@@ -557,28 +544,28 @@ zfs_nicestrtonum(const char *str, uint64_t *val)
* by zfs_prop_set() and some libzfs consumers.
*/
int
-zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
+zfs_prop_validate(libzfs_handle_t *hdl, zfs_prop_t prop, const char *value,
+ uint64_t *intval)
{
const char *propname = zfs_prop_to_name(prop);
uint64_t number;
- char reason[64];
+ char errbuf[1024];
int i;
/*
* Check to see if this a read-only property.
*/
- if (zfs_prop_readonly(prop)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s property: read-only property"), propname);
- return (-1);
- }
+ if (zfs_prop_readonly(prop))
+ return (zfs_error(hdl, EZFS_PROPREADONLY,
+ dgettext(TEXT_DOMAIN, "cannot set %s property"), propname));
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "bad %s value '%s'"), propname, value);
/* See if the property value is too long */
if (strlen(value) >= ZFS_MAXPROPLEN) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': value is too long"), propname,
- value);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "value is too long"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
/* Perform basic checking based on property type */
@@ -589,10 +576,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
} else if (strcmp(value, "off") == 0) {
number = 0;
} else {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': must be 'on' or 'off'"),
- propname, value);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "must be 'on' or 'off'"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
break;
@@ -603,21 +589,15 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
break;
}
- if (nicestrtonum(value, &number, reason,
- sizeof (reason)) != 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': %s"), propname, value,
- reason);
- return (-1);
- }
+ if (nicestrtonum(hdl, value, &number) != 0)
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
/* don't allow 0 for quota, use 'none' instead */
if (prop == ZFS_PROP_QUOTA && number == 0 &&
strcmp(value, "none") != 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': use '%s=none' to disable"),
- propname, value, propname);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "use 'quota=none' to disable"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
@@ -625,13 +605,11 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
prop == ZFS_PROP_VOLBLOCKSIZE) {
if (number < SPA_MINBLOCKSIZE ||
number > SPA_MAXBLOCKSIZE || !ISP2(number)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': "
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"must be power of 2 from %u to %uk"),
- propname, value,
(uint_t)SPA_MINBLOCKSIZE,
(uint_t)SPA_MAXBLOCKSIZE >> 10);
- return (-1);
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
}
@@ -652,11 +630,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
break;
if (value[0] != '/') {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': must be an absolute "
- "path, 'none', or 'legacy'"),
- propname, value);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "must be an absolute path, 'none', or "
+ "'legacy'"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
break;
@@ -670,11 +647,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
}
if (checksum_table[i].name == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': must be 'on', 'off', "
- "'fletcher2', 'fletcher4', or 'sha256'"),
- propname, value);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "must be 'on', 'off', 'fletcher2', "
+ "'fletcher4', or 'sha256'"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
break;
@@ -688,11 +664,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
}
if (compress_table[i].name == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': must be 'on', 'off', "
- "or 'lzjb'"),
- propname, value);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "must be 'on', 'off', or 'lzjb'"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
break;
@@ -705,11 +679,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
}
if (snapdir_table[i].name == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': must be 'hidden' "
- "or 'visible'"),
- propname, value);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "must be 'hidden' or 'visible'"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
break;
@@ -723,11 +695,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
}
if (acl_mode_table[i].name == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': must be 'discard', "
- "'groupmask' or 'passthrough'"),
- propname, value);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "must be 'disacard', 'groupmask', or "
+ "'passthrough'"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
break;
@@ -741,11 +712,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
}
if (acl_inherit_table[i].name == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad %s value '%s': must be 'discard', "
- "'noallow', 'secure' or 'passthrough'"),
- propname, value);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "must be 'discard, 'noallow', 'secure', "
+ "or 'passthrough'"));
+ return (zfs_error(hdl, EZFS_BADPROP, errbuf));
}
break;
@@ -775,19 +745,22 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
zfs_cmd_t zc = { 0 };
int ret;
prop_changelist_t *cl;
+ char errbuf[1024];
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
- if (zfs_prop_validate(prop, propval, &number) != 0)
+ if (zfs_prop_validate(zhp->zfs_hdl, prop, propval, &number) != 0)
return (-1);
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot set %s for '%s'"), propname,
+ zhp->zfs_name);
+
/*
* Check to see if the value applies to this type
*/
- if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s for '%s': property does not apply to %ss"),
- propname, zhp->zfs_name, zfs_type_to_name(zhp->zfs_type));
- return (-1);
- }
+ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+ return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
/*
* For the mountpoint and sharenfs properties, check if it can be set
@@ -804,29 +777,24 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
if (prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS) {
if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
if (getzoneid() == GLOBAL_ZONEID) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s for '%s': "
- "dataset is used in a non-global zone"),
- propname, zhp->zfs_name);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset is used in a non-global zone"));
+ return (zfs_error(hdl, EZFS_ZONED, errbuf));
} else if (prop == ZFS_PROP_SHARENFS) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s for '%s': filesystems "
- "cannot be shared in a non-global zone"),
- propname, zhp->zfs_name);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "filesystems cannot be shared in a "
+ "non-global zone"));
+ return (zfs_error(hdl, EZFS_ZONED, errbuf));
}
} else if (getzoneid() != GLOBAL_ZONEID) {
/*
* If zoned property is 'off', this must be in
* a globle zone. If not, something is wrong.
*/
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s for '%s': dataset is "
- "used in a non-global zone, but 'zoned' "
- "property is not set"),
- propname, zhp->zfs_name);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset is used in a non-global zone, but "
+ "'zoned' property is not set"));
+ return (zfs_error(hdl, EZFS_ZONED, errbuf));
}
}
@@ -834,11 +802,10 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
return (-1);
if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s for '%s', "
- "child dataset with inherited mountpoint is used "
- "in a non-global zone"),
- propname, zhp->zfs_name);
- ret = -1;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "child dataset with inherited mountpoint is used "
+ "in a non-global zone"));
+ ret = zfs_error(hdl, EZFS_ZONED, errbuf);
goto error;
}
@@ -853,11 +820,12 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
switch (prop) {
case ZFS_PROP_QUOTA:
zc.zc_cookie = number;
- ret = zfs_ioctl(ZFS_IOC_SET_QUOTA, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_QUOTA, &zc);
break;
case ZFS_PROP_RESERVATION:
zc.zc_cookie = number;
- ret = zfs_ioctl(ZFS_IOC_SET_RESERVATION, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_RESERVATION,
+ &zc);
break;
case ZFS_PROP_MOUNTPOINT:
case ZFS_PROP_SHARENFS:
@@ -870,15 +838,16 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
sizeof (zc.zc_prop_value));
zc.zc_intsz = 1;
zc.zc_numints = strlen(propval) + 1;
- ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_PROP, &zc);
break;
case ZFS_PROP_VOLSIZE:
zc.zc_volsize = number;
- ret = zfs_ioctl(ZFS_IOC_SET_VOLSIZE, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_VOLSIZE, &zc);
break;
case ZFS_PROP_VOLBLOCKSIZE:
zc.zc_volblocksize = number;
- ret = zfs_ioctl(ZFS_IOC_SET_VOLBLOCKSIZE, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_VOLBLOCKSIZE,
+ &zc);
break;
default:
(void) strlcpy(zc.zc_prop_name, propname,
@@ -887,25 +856,13 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
*(uint64_t *)zc.zc_prop_value = number;
zc.zc_intsz = 8;
zc.zc_numints = 1;
- ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_PROP, &zc);
break;
}
if (ret != 0) {
switch (errno) {
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s for '%s': permission "
- "denied"), propname, zhp->zfs_name);
- break;
-
- case ENOENT:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot open '%s': no such %s"), zhp->zfs_name,
- zfs_type_to_name(zhp->zfs_type));
- break;
-
case ENOSPC:
/*
* For quotas and reservations, ENOSPC indicates
@@ -914,41 +871,33 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
*/
switch (prop) {
case ZFS_PROP_QUOTA:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s "
- "for '%s': size is less than current "
- "used or reserved space"), propname,
- zhp->zfs_name);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "size is less than current used or "
+ "reserved space"));
+ (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
break;
case ZFS_PROP_RESERVATION:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s "
- "for '%s': size is greater than available "
- "space"), propname, zhp->zfs_name);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "size is greater than available space"));
+ (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
break;
default:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s for '%s': out of space"),
- propname, zhp->zfs_name);
+ (void) zfs_standard_error(hdl, errno, errbuf);
break;
}
break;
case EBUSY:
- if (prop == ZFS_PROP_VOLBLOCKSIZE) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s for '%s': "
- "volume already contains data"),
- propname, zhp->zfs_name);
- } else {
- zfs_baderror(errno);
- }
+ if (prop == ZFS_PROP_VOLBLOCKSIZE)
+ (void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
+ else
+ return (zfs_standard_error(hdl, EBUSY, errbuf));
break;
case EROFS:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s for "
- "'%s': read only %s"), propname, zhp->zfs_name,
- zfs_type_to_name(zhp->zfs_type));
+ (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
break;
case EOVERFLOW:
@@ -957,16 +906,13 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
*/
#ifdef _ILP32
if (prop == ZFS_PROP_VOLSIZE) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot set %s for '%s': "
- "max volume size is 1TB on 32-bit systems"),
- propname, zhp->zfs_name);
+ (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
break;
}
#endif
- zfs_baderror(errno);
+ /* FALLTHROUGH */
default:
- zfs_baderror(errno);
+ (void) zfs_standard_error(hdl, errno, errbuf);
}
} else {
/*
@@ -994,44 +940,35 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop)
zfs_cmd_t zc = { 0 };
int ret;
prop_changelist_t *cl;
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ char errbuf[1024];
+
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
/*
* Verify that this property is inheritable.
*/
- if (zfs_prop_readonly(prop)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot inherit %s for '%s': property is read-only"),
- propname, zhp->zfs_name);
- return (-1);
- }
+ if (zfs_prop_readonly(prop))
+ return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
- if (!zfs_prop_inheritable(prop)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot inherit %s for '%s': property is not inheritable"),
- propname, zhp->zfs_name);
- return (-1);
- }
+ if (!zfs_prop_inheritable(prop))
+ return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
/*
* Check to see if the value applies to this type
*/
- if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot inherit %s for '%s': property does "
- "not apply to %ss"), propname, zhp->zfs_name,
- zfs_type_to_name(zhp->zfs_type));
- return (-1);
- }
+ if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+ return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_prop_name, propname, sizeof (zc.zc_prop_name));
if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s', "
- "dataset is used in a non-global zone"), propname,
- zhp->zfs_name);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset is used in a non-global zone"));
+ return (zfs_error(hdl, EZFS_ZONED, errbuf));
}
/*
@@ -1041,11 +978,10 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop)
return (-1);
if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s', "
- "child dataset with inherited mountpoint is "
- "used in a non-global zone"),
- propname, zhp->zfs_name);
- ret = -1;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "child dataset with inherited mountpoint is used "
+ "in a non-global zone"));
+ ret = zfs_error(hdl, EZFS_ZONED, errbuf);
goto error;
}
@@ -1054,27 +990,9 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop)
zc.zc_numints = 0;
- if ((ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc)) != 0) {
- switch (errno) {
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot inherit %s for '%s': permission "
- "denied"), propname, zhp->zfs_name);
- break;
- case ENOENT:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot open '%s': no such %s"), zhp->zfs_name,
- zfs_type_to_name(zhp->zfs_type));
- break;
- case ENOSPC:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot inherit %s for '%s': "
- "out of space"), propname, zhp->zfs_name);
- break;
- default:
- zfs_baderror(errno);
- }
-
+ if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd,
+ ZFS_IOC_SET_PROP, &zc)) != 0) {
+ return (zfs_standard_error(hdl, errno, errbuf));
} else {
if ((ret = changelist_postfix(cl)) != 0)
@@ -1151,11 +1069,10 @@ getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
* If they differ from the on-disk values, report the current values and mark
* the source "temporary".
*/
-static uint64_t
+static int
get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
- char **source)
+ char **source, uint64_t *val)
{
- uint64_t val;
struct mnttab mnt;
*source = NULL;
@@ -1167,86 +1084,90 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
switch (prop) {
case ZFS_PROP_ATIME:
- val = getprop_uint64(zhp, prop, source);
+ *val = getprop_uint64(zhp, prop, source);
- if (hasmntopt(&mnt, MNTOPT_ATIME) && !val) {
- val = TRUE;
+ if (hasmntopt(&mnt, MNTOPT_ATIME) && !*val) {
+ *val = B_TRUE;
if (src)
*src = ZFS_SRC_TEMPORARY;
- } else if (hasmntopt(&mnt, MNTOPT_NOATIME) && val) {
- val = FALSE;
+ } else if (hasmntopt(&mnt, MNTOPT_NOATIME) && *val) {
+ *val = B_FALSE;
if (src)
*src = ZFS_SRC_TEMPORARY;
}
- return (val);
+ break;
case ZFS_PROP_AVAILABLE:
- return (zhp->zfs_dmustats.dds_available);
+ *val = zhp->zfs_dmustats.dds_available;
+ break;
case ZFS_PROP_DEVICES:
- val = getprop_uint64(zhp, prop, source);
+ *val = getprop_uint64(zhp, prop, source);
- if (hasmntopt(&mnt, MNTOPT_DEVICES) && !val) {
- val = TRUE;
+ if (hasmntopt(&mnt, MNTOPT_DEVICES) && !*val) {
+ *val = B_TRUE;
if (src)
*src = ZFS_SRC_TEMPORARY;
- } else if (hasmntopt(&mnt, MNTOPT_NODEVICES) && val) {
- val = FALSE;
+ } else if (hasmntopt(&mnt, MNTOPT_NODEVICES) && *val) {
+ *val = B_FALSE;
if (src)
*src = ZFS_SRC_TEMPORARY;
}
- return (val);
+ break;
case ZFS_PROP_EXEC:
- val = getprop_uint64(zhp, prop, source);
+ *val = getprop_uint64(zhp, prop, source);
- if (hasmntopt(&mnt, MNTOPT_EXEC) && !val) {
- val = TRUE;
+ if (hasmntopt(&mnt, MNTOPT_EXEC) && !*val) {
+ *val = B_TRUE;
if (src)
*src = ZFS_SRC_TEMPORARY;
- } else if (hasmntopt(&mnt, MNTOPT_NOEXEC) && val) {
- val = FALSE;
+ } else if (hasmntopt(&mnt, MNTOPT_NOEXEC) && *val) {
+ *val = B_FALSE;
if (src)
*src = ZFS_SRC_TEMPORARY;
}
- return (val);
+ break;
case ZFS_PROP_RECORDSIZE:
case ZFS_PROP_COMPRESSION:
case ZFS_PROP_ZONED:
- val = getprop_uint64(zhp, prop, source);
- return (val);
+ *val = getprop_uint64(zhp, prop, source);
+ break;
case ZFS_PROP_READONLY:
- val = getprop_uint64(zhp, prop, source);
+ *val = getprop_uint64(zhp, prop, source);
- if (hasmntopt(&mnt, MNTOPT_RO) && !val) {
- val = TRUE;
+ if (hasmntopt(&mnt, MNTOPT_RO) && !*val) {
+ *val = B_TRUE;
if (src)
*src = ZFS_SRC_TEMPORARY;
- } else if (hasmntopt(&mnt, MNTOPT_RW) && val) {
- val = FALSE;
+ } else if (hasmntopt(&mnt, MNTOPT_RW) && *val) {
+ *val = B_FALSE;
if (src)
*src = ZFS_SRC_TEMPORARY;
}
- return (val);
+ break;
case ZFS_PROP_CREATION:
- return (zhp->zfs_dmustats.dds_creation_time);
+ *val = zhp->zfs_dmustats.dds_creation_time;
+ break;
case ZFS_PROP_QUOTA:
if (zhp->zfs_dmustats.dds_quota == 0)
*source = ""; /* default */
else
*source = zhp->zfs_name;
- return (zhp->zfs_dmustats.dds_quota);
+ *val = zhp->zfs_dmustats.dds_quota;
+ break;
case ZFS_PROP_RESERVATION:
if (zhp->zfs_dmustats.dds_reserved == 0)
*source = ""; /* default */
else
*source = zhp->zfs_name;
- return (zhp->zfs_dmustats.dds_reserved);
+ *val = zhp->zfs_dmustats.dds_reserved;
+ break;
case ZFS_PROP_COMPRESSRATIO:
/*
@@ -1255,43 +1176,50 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
* 100, so '2.5x' would be returned as 250.
*/
if (zhp->zfs_dmustats.dds_compressed_bytes == 0)
- return (100ULL);
+ *val = 100ULL;
else
- return (zhp->zfs_dmustats.dds_uncompressed_bytes * 100 /
+ *val =
+ (zhp->zfs_dmustats.dds_uncompressed_bytes * 100 /
zhp->zfs_dmustats.dds_compressed_bytes);
+ break;
case ZFS_PROP_REFERENCED:
/*
* 'referenced' refers to the amount of physical space
* referenced (possibly shared) by this object.
*/
- return (zhp->zfs_dmustats.dds_space_refd);
+ *val = zhp->zfs_dmustats.dds_space_refd;
+ break;
case ZFS_PROP_SETUID:
- val = getprop_uint64(zhp, prop, source);
+ *val = getprop_uint64(zhp, prop, source);
- if (hasmntopt(&mnt, MNTOPT_SETUID) && !val) {
- val = TRUE;
+ if (hasmntopt(&mnt, MNTOPT_SETUID) && !*val) {
+ *val = B_TRUE;
if (src)
*src = ZFS_SRC_TEMPORARY;
- } else if (hasmntopt(&mnt, MNTOPT_NOSETUID) && val) {
- val = FALSE;
+ } else if (hasmntopt(&mnt, MNTOPT_NOSETUID) && *val) {
+ *val = B_FALSE;
if (src)
*src = ZFS_SRC_TEMPORARY;
}
- return (val);
+ break;
case ZFS_PROP_VOLSIZE:
- return (zhp->zfs_volsize);
+ *val = zhp->zfs_volsize;
+ break;
case ZFS_PROP_VOLBLOCKSIZE:
- return (zhp->zfs_volblocksize);
+ *val = zhp->zfs_volblocksize;
+ break;
case ZFS_PROP_USED:
- return (zhp->zfs_dmustats.dds_space_used);
+ *val = zhp->zfs_dmustats.dds_space_used;
+ break;
case ZFS_PROP_CREATETXG:
- return (zhp->zfs_dmustats.dds_creation_txg);
+ *val = zhp->zfs_dmustats.dds_creation_txg;
+ break;
case ZFS_PROP_MOUNTED:
/*
@@ -1306,16 +1234,22 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
search.mnt_special = (char *)zhp->zfs_name;
search.mnt_fstype = MNTTYPE_ZFS;
- rewind(zfs_mnttab());
+ rewind(zhp->zfs_hdl->libzfs_mnttab);
- if (getmntany(zfs_mnttab(), &entry, &search) == 0)
- zhp->zfs_mntopts =
- zfs_strdup(entry.mnt_mntopts);
+ if (getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry,
+ &search) == 0 && (zhp->zfs_mntopts =
+ zfs_strdup(zhp->zfs_hdl,
+ entry.mnt_mntopts)) == NULL)
+ return (-1);
}
- return (zhp->zfs_mntopts != NULL);
+ *val = (zhp->zfs_mntopts != NULL);
+ break;
default:
- zfs_baderror(EINVAL);
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "cannot get non-numeric property"));
+ return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
+ dgettext(TEXT_DOMAIN, "internal error")));
}
return (0);
@@ -1355,7 +1289,7 @@ get_source(zfs_handle_t *zhp, zfs_source_t *srctype, char *source,
*/
int
zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
- zfs_source_t *src, char *statbuf, size_t statlen, int literal)
+ zfs_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
{
char *source = NULL;
uint64_t val;
@@ -1383,8 +1317,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
* Basic boolean values are built on top of
* get_numeric_property().
*/
- nicebool(get_numeric_property(zhp, prop, src, &source),
- propbuf, proplen);
+ if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+ return (-1);
+ nicebool(val, propbuf, proplen);
break;
@@ -1399,7 +1334,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
* Basic numeric values are built on top of
* get_numeric_property().
*/
- val = get_numeric_property(zhp, prop, src, &source);
+ if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+ return (-1);
if (literal)
(void) snprintf(propbuf, proplen, "%llu", val);
else
@@ -1533,7 +1469,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
case ZFS_PROP_QUOTA:
case ZFS_PROP_RESERVATION:
- val = get_numeric_property(zhp, prop, src, &source);
+ if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+ return (-1);
/*
* If quota or reservation is 0, we translate this into 'none'
@@ -1555,7 +1492,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
break;
case ZFS_PROP_COMPRESSRATIO:
- val = get_numeric_property(zhp, prop, src, &source);
+ if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+ return (-1);
(void) snprintf(propbuf, proplen, "%lld.%02lldx", val / 100,
val % 100);
break;
@@ -1572,7 +1510,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
str = "snapshot";
break;
default:
- zfs_baderror(zhp->zfs_type);
+ abort();
}
(void) snprintf(propbuf, proplen, "%s", str);
break;
@@ -1584,7 +1522,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
* it's a boolean value, the typical values of "on" and "off"
* don't make sense, so we translate to "yes" and "no".
*/
- if (get_numeric_property(zhp, ZFS_PROP_MOUNTED, src, &source))
+ if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
+ src, &source, &val) != 0)
+ return (-1);
+ if (val)
(void) strlcpy(propbuf, "yes", proplen);
else
(void) strlcpy(propbuf, "no", proplen);
@@ -1600,7 +1541,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
break;
default:
- zfs_baderror(EINVAL);
+ abort();
}
get_source(zhp, src, source, statbuf, statlen);
@@ -1618,8 +1559,11 @@ zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
{
char *source;
zfs_source_t sourcetype = ZFS_SRC_NONE;
+ uint64_t val;
+
+ (void) get_numeric_property(zhp, prop, &sourcetype, &source, &val);
- return (get_numeric_property(zhp, prop, &sourcetype, &source));
+ return (val);
}
/*
@@ -1635,12 +1579,15 @@ zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
* Check to see if this property applies to our object
*/
if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
- return (-1);
+ return (zfs_error(zhp->zfs_hdl, EZFS_PROPTYPE,
+ dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
+ zfs_prop_to_name(prop)));
if (src)
*src = ZFS_SRC_NONE;
- *value = get_numeric_property(zhp, prop, src, &source);
+ if (get_numeric_property(zhp, prop, src, &source, value) != 0)
+ return (-1);
get_source(zhp, src, source, statbuf, statlen);
@@ -1676,7 +1623,7 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
int ret;
for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- zfs_ioctl(ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+ ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
/*
* Ignore private dataset names.
@@ -1688,7 +1635,8 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
* Silently ignore errors, as the only plausible explanation is
* that the pool has since been removed.
*/
- if ((nzhp = make_dataset_handle(zc.zc_name)) == NULL)
+ if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+ zc.zc_name)) == NULL)
continue;
if ((ret = func(nzhp, data)) != 0)
@@ -1701,7 +1649,8 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
* obtained the handle.
*/
if (errno != ESRCH && errno != ENOENT)
- zfs_baderror(errno);
+ return (zfs_standard_error(zhp->zfs_hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
return (0);
}
@@ -1717,10 +1666,12 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
int ret;
for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- zfs_ioctl(ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
+ ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+ &zc) == 0;
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
- if ((nzhp = make_dataset_handle(zc.zc_name)) == NULL)
+ if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+ zc.zc_name)) == NULL)
continue;
if ((ret = func(nzhp, data)) != 0)
@@ -1733,7 +1684,8 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
* obtained the handle. Silently ignore this case, and return success.
*/
if (errno != ESRCH && errno != ENOENT)
- zfs_baderror(errno);
+ return (zfs_standard_error(zhp->zfs_hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
return (0);
}
@@ -1774,21 +1726,22 @@ parent_name(const char *path, char *buf, size_t buflen)
* Checks to make sure that the given path has a parent, and that it exists.
*/
static int
-check_parents(const char *path, zfs_type_t type)
+check_parents(libzfs_handle_t *hdl, const char *path)
{
zfs_cmd_t zc = { 0 };
char parent[ZFS_MAXNAMELEN];
char *slash;
zfs_handle_t *zhp;
+ char errbuf[1024];
+
+ (void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
+ path);
/* get parent, and check to see if this is just a pool */
if (parent_name(path, parent, sizeof (parent)) != 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': missing dataset name"),
- path, zfs_type_to_name(type));
- zfs_error(dgettext(TEXT_DOMAIN,
- "use 'zpool create' to create a storage pool"));
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "missing dataset name"));
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
}
/* check to see if the pool exists */
@@ -1796,40 +1749,39 @@ check_parents(const char *path, zfs_type_t type)
slash = parent + strlen(parent);
(void) strncpy(zc.zc_name, parent, slash - parent);
zc.zc_name[slash - parent] = '\0';
- if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
errno == ENOENT) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': no such pool '%s'"), path, zc.zc_name);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "no such pool '%s'"), zc.zc_name);
+ return (zfs_error(hdl, EZFS_NOENT, errbuf));
}
/* check to see if the parent dataset exists */
- if ((zhp = make_dataset_handle(parent)) == NULL) {
+ if ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
switch (errno) {
case ENOENT:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': parent does not exist"), path);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "parent does not exist"));
+ return (zfs_error(hdl, EZFS_NOENT, errbuf));
default:
- zfs_baderror(errno);
+ return (zfs_standard_error(hdl, errno, errbuf));
}
}
/* we are in a non-global zone, but parent is in the global zone */
if (getzoneid() != GLOBAL_ZONEID &&
!zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': permission denied"), path);
+ (void) zfs_standard_error(hdl, EPERM, errbuf);
zfs_close(zhp);
return (-1);
}
/* make sure parent is a filesystem */
if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': parent is not a filesystem"),
- path);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "parent is not a filesystem"));
+ (void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
zfs_close(zhp);
return (-1);
}
@@ -1843,44 +1795,35 @@ check_parents(const char *path, zfs_type_t type)
* only for volumes, and indicate the size and blocksize of the volume.
*/
int
-zfs_create(const char *path, zfs_type_t type,
+zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
const char *sizestr, const char *blocksizestr)
{
- char reason[64];
zfs_cmd_t zc = { 0 };
int ret;
uint64_t size = 0;
uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
+ char errbuf[1024];
/* convert sizestr into integer size */
- if (sizestr != NULL && nicestrtonum(sizestr, &size,
- reason, sizeof (reason)) != 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad volume size '%s': %s"), sizestr, reason);
- return (-1);
- }
+ if (sizestr != NULL && nicestrtonum(hdl, sizestr, &size) != 0)
+ return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
+ "bad volume size '%s'"), sizestr));
/* convert blocksizestr into integer blocksize */
- if (blocksizestr != NULL && nicestrtonum(blocksizestr, &blocksize,
- reason, sizeof (reason)) != 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad volume blocksize '%s': %s"), blocksizestr, reason);
- return (-1);
- }
+ if (blocksizestr != NULL && nicestrtonum(hdl, blocksizestr,
+ &blocksize) != 0)
+ return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
+ "bad volume blocksize '%s'"), blocksizestr));
+
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot create '%s'"), path);
/* validate the path, taking care to note the extended error message */
- if (!zfs_validate_name(path, type, reason, sizeof (reason))) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': %s in %s name"), path, reason,
- zfs_type_to_name(type));
- if (strstr(reason, "snapshot") != NULL)
- zfs_error(dgettext(TEXT_DOMAIN,
- "use 'zfs snapshot' to create a snapshot"));
- return (-1);
- }
+ if (!zfs_validate_name(hdl, path, type))
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
/* validate parents exist */
- if (check_parents(path, type) != 0)
+ if (check_parents(hdl, path) != 0)
return (-1);
/*
@@ -1891,10 +1834,10 @@ zfs_create(const char *path, zfs_type_t type,
* first try to see if the dataset exists.
*/
(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
- if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) == 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': dataset exists"), path);
- return (-1);
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset already exists"));
+ return (zfs_error(hdl, EZFS_EXISTS, errbuf));
}
if (type == ZFS_TYPE_VOLUME)
@@ -1911,30 +1854,30 @@ zfs_create(const char *path, zfs_type_t type,
* zero.
*/
if (size == 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad volume size '%s': cannot be zero"), sizestr);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "cannot be zero"));
+ return (zfs_error(hdl, EZFS_BADPROP,
+ dgettext(TEXT_DOMAIN, "bad volume size '%s'"),
+ sizestr));
}
if (blocksize < SPA_MINBLOCKSIZE ||
blocksize > SPA_MAXBLOCKSIZE || !ISP2(blocksize)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad volume block size '%s': "
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"must be power of 2 from %u to %uk"),
- blocksizestr,
(uint_t)SPA_MINBLOCKSIZE,
(uint_t)SPA_MAXBLOCKSIZE >> 10);
- return (-1);
+ return (zfs_error(hdl, EZFS_BADPROP,
+ dgettext(TEXT_DOMAIN,
+ "bad volume block size '%s'"), blocksizestr));
}
if (size % blocksize != 0) {
- char buf[64];
- zfs_nicenum(blocksize, buf, sizeof (buf));
- zfs_error(dgettext(TEXT_DOMAIN,
- "bad volume size '%s': "
- "must be multiple of volume block size (%s)"),
- sizestr, buf);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "must be a multiple of volume block size"));
+ return (zfs_error(hdl, EZFS_BADPROP,
+ dgettext(TEXT_DOMAIN, "bad volume size '%s'"),
+ sizestr));
}
zc.zc_volsize = size;
@@ -1942,10 +1885,10 @@ zfs_create(const char *path, zfs_type_t type,
}
/* create the dataset */
- ret = zfs_ioctl(ZFS_IOC_CREATE, &zc);
+ ret = ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
if (ret == 0 && type == ZFS_TYPE_VOLUME)
- ret = zvol_create_link(path);
+ ret = zvol_create_link(hdl, path);
/* check for failure */
if (ret != 0) {
@@ -1954,81 +1897,38 @@ zfs_create(const char *path, zfs_type_t type,
switch (errno) {
case ENOENT:
- /*
- * The parent dataset has been deleted since our
- * previous check.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': no such parent '%s'"),
- path, parent);
- break;
-
- case EPERM:
- /*
- * The user doesn't have permission to create a new
- * dataset here.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': permission denied"), path);
- break;
-
- case EDQUOT:
- case ENOSPC:
- /*
- * The parent dataset does not have enough free space
- * to create a new dataset.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': not enough space in '%s'"),
- path, parent);
- break;
-
- case EEXIST:
- /*
- * The target dataset already exists. We should have
- * caught this above, but there may be some unexplained
- * race condition.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': dataset exists"), path);
- break;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "no such parent '%s'"), parent);
+ return (zfs_error(hdl, EZFS_NOENT, errbuf));
case EINVAL:
- /*
- * The target dataset does not support children.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': children unsupported in '%s'"),
- path, parent);
- break;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "parent '%s' is not a filesysem"), parent);
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
case EDOM:
- zfs_error(dgettext(TEXT_DOMAIN, "bad %s value '%s': "
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"must be power of 2 from %u to %uk"),
- zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
- blocksizestr ? blocksizestr : "<unknown>",
(uint_t)SPA_MINBLOCKSIZE,
(uint_t)SPA_MAXBLOCKSIZE >> 10);
- break;
+
+ return (zfs_error(hdl, EZFS_BADPROP,
+ dgettext(TEXT_DOMAIN, "bad block size '%s'"),
+ blocksizestr ? blocksizestr : "<unknown>"));
+
#ifdef _ILP32
case EOVERFLOW:
/*
* This platform can't address a volume this big.
*/
- if (type == ZFS_TYPE_VOLUME) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': "
- "max volume size is 1TB on 32-bit systems"),
- path);
- break;
- }
+ if (type == ZFS_TYPE_VOLUME)
+ return (zfs_error(hdl, EZFS_VOLTOOBIG,
+ errbuf));
#endif
-
+ /* FALLTHROUGH */
default:
- zfs_baderror(errno);
+ return (zfs_standard_error(hdl, errno, errbuf));
}
-
- return (-1);
}
return (0);
@@ -2043,6 +1943,7 @@ zfs_destroy(zfs_handle_t *zhp)
{
zfs_cmd_t zc = { 0 };
int ret;
+ char errbuf[1024];
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
@@ -2051,7 +1952,7 @@ zfs_destroy(zfs_handle_t *zhp)
* so that we do the right thing for snapshots of volumes.
*/
if (zhp->zfs_volblocksize != 0) {
- if (zvol_remove_link(zhp->zfs_name) != 0)
+ if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
return (-1);
zc.zc_objset_type = DMU_OST_ZVOL;
@@ -2059,63 +1960,15 @@ zfs_destroy(zfs_handle_t *zhp)
zc.zc_objset_type = DMU_OST_ZFS;
}
- ret = zfs_ioctl(ZFS_IOC_DESTROY, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
- if (ret != 0) {
- switch (errno) {
-
- case EPERM:
- /*
- * We don't have permission to destroy this dataset.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot destroy '%s': permission denied"),
- zhp->zfs_name);
- break;
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot destroy '%s'"), zhp->zfs_name);
- case EIO:
- /*
- * I/O error.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot destroy '%s': I/O error"),
- zhp->zfs_name);
- break;
-
- case ENOENT:
- /*
- * We've hit a race condition where the dataset has been
- * destroyed since we opened it.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot destroy '%s': no such %s"),
- zhp->zfs_name, zfs_type_to_name(zhp->zfs_type));
- break;
-
- case EBUSY:
- /*
- * Even if we destroy all children, there is a chance we
- * can hit this case if:
- *
- * - A child dataset has since been created
- * - A filesystem is mounted
- *
- * This error message is awful, but hopefully we've
- * already caught the common cases (and aborted more
- * appropriately) before calling this function. There's
- * nothing else we can do at this point.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot destroy '%s': %s is busy"),
- zhp->zfs_name, zfs_type_to_name(zhp->zfs_type));
- break;
-
- default:
- zfs_baderror(errno);
- }
-
- return (-1);
- }
+ if (ret != 0)
+ return (zfs_standard_error(zhp->zfs_hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
+ zhp->zfs_name));
remove_mountpoint(zhp);
@@ -2128,24 +1981,23 @@ zfs_destroy(zfs_handle_t *zhp)
int
zfs_clone(zfs_handle_t *zhp, const char *target)
{
- char reason[64];
zfs_cmd_t zc = { 0 };
char parent[ZFS_MAXNAMELEN];
int ret;
+ char errbuf[1024];
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot create '%s'"), target);
+
/* validate the target name */
- if (!zfs_validate_name(target, ZFS_TYPE_FILESYSTEM, reason,
- sizeof (reason))) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': %s in filesystem name"), target,
- reason, zfs_type_to_name(ZFS_TYPE_FILESYSTEM));
- return (-1);
- }
+ if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM))
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
/* validate parents exist */
- if (check_parents(target, zhp->zfs_type) != 0)
+ if (check_parents(zhp->zfs_hdl, target) != 0)
return (-1);
(void) parent_name(target, parent, sizeof (parent));
@@ -2158,18 +2010,10 @@ zfs_clone(zfs_handle_t *zhp, const char *target)
(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_filename, zhp->zfs_name, sizeof (zc.zc_filename));
- ret = zfs_ioctl(ZFS_IOC_CREATE, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
if (ret != 0) {
switch (errno) {
- case EPERM:
- /*
- * The user doesn't have permission to create the clone.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': permission denied"),
- target);
- break;
case ENOENT:
/*
@@ -2181,42 +2025,147 @@ zfs_clone(zfs_handle_t *zhp, const char *target)
* that doesn't exist anymore, or whether the target
* dataset doesn't exist.
*/
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': no such parent '%s'"),
- target, parent);
- break;
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "no such parent '%s'"), parent);
+ return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
- case EDQUOT:
- case ENOSPC:
- /*
- * There is not enough space in the target dataset
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': not enough space in '%s'"),
- target, parent);
- break;
+ case EXDEV:
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "source and target pools differ"));
+ return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
+ errbuf));
- case EEXIST:
- /*
- * The target already exists.
- */
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': dataset exists"), target);
- break;
+ default:
+ return (zfs_standard_error(zhp->zfs_hdl, errno,
+ errbuf));
+ }
+ } else if (zhp->zfs_volblocksize != 0) {
+ ret = zvol_create_link(zhp->zfs_hdl, target);
+ }
- case EXDEV:
+ return (ret);
+}
+
+typedef struct promote_data {
+ char cb_mountpoint[MAXPATHLEN];
+ const char *cb_target;
+ const char *cb_errbuf;
+ uint64_t cb_pivot_txg;
+} promote_data_t;
+
+static int
+promote_snap_cb(zfs_handle_t *zhp, void *data)
+{
+ promote_data_t *pd = data;
+ zfs_handle_t *szhp;
+ int err;
+ char snapname[MAXPATHLEN];
+ char *cp;
+
+ /* We don't care about snapshots after the pivot point */
+ if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg)
+ return (0);
+
+ /*
+ * Unmount it. We actually need to open it to provoke it to be
+ * mounted first, because if it is not mounted, umount2 will
+ * mount it!
+ */
+ (void) strcpy(snapname, pd->cb_mountpoint);
+ (void) strcat(snapname, "/.zfs/snapshot/");
+ cp = strchr(zhp->zfs_name, '@');
+ (void) strcat(snapname, cp+1);
+ err = open(snapname, O_RDONLY);
+ if (err != -1)
+ (void) close(err);
+ (void) umount2(snapname, MS_FORCE);
+
+ /* Check for conflicting names */
+ (void) strcpy(snapname, pd->cb_target);
+ (void) strcat(snapname, cp);
+ szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
+ if (szhp != NULL) {
+ zfs_close(szhp);
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "snapshot name '%s' from origin \n"
+ "conflicts with '%s' from target"),
+ zhp->zfs_name, snapname);
+ return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf));
+ }
+ return (0);
+}
+
+/*
+ * Promotes the given clone fs to be the clone parent.
+ */
+int
+zfs_promote(zfs_handle_t *zhp)
+{
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ zfs_cmd_t zc = { 0 };
+ char parent[MAXPATHLEN];
+ char *cp;
+ int ret;
+ zfs_handle_t *pzhp;
+ promote_data_t pd;
+ char errbuf[1024];
+
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot promote '%s'"), zhp->zfs_name);
+
+ if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "snapshots can not be promoted"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ }
+
+ (void) strcpy(parent, zhp->zfs_dmustats.dds_clone_of);
+ if (parent[0] == '\0') {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "not a cloned filesystem"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ }
+ cp = strchr(parent, '@');
+ *cp = '\0';
+
+ /* Walk the snapshots we will be moving */
+ pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT);
+ if (pzhp == NULL)
+ return (-1);
+ pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
+ zfs_close(pzhp);
+ pd.cb_target = zhp->zfs_name;
+ pd.cb_errbuf = errbuf;
+ pzhp = zfs_open(hdl, parent, ZFS_TYPE_ANY);
+ if (pzhp == NULL)
+ return (-1);
+ (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
+ sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
+ ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
+ if (ret != 0)
+ return (-1);
+
+ /* issue the ioctl */
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+ ret = ioctl(hdl->libzfs_fd, ZFS_IOC_PROMOTE, &zc);
+
+ if (ret != 0) {
+ switch (errno) {
+
+ case EEXIST:
/*
- * The source and target pools differ.
+ * There is a conflicting snapshot name. We
+ * should have caught this above, but they could
+ * have renamed something in the mean time.
*/
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "source and target pools differ"), target);
- break;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "conflicting snapshot name from parent '%s'"),
+ parent);
+ return (zfs_error(hdl, EZFS_EXISTS, errbuf));
default:
- zfs_baderror(errno);
+ return (zfs_standard_error(hdl, errno, errbuf));
}
- } else if (zhp->zfs_volblocksize != 0) {
- ret = zvol_create_link(target);
}
return (ret);
@@ -2226,40 +2175,36 @@ zfs_clone(zfs_handle_t *zhp, const char *target)
* Takes a snapshot of the given dataset
*/
int
-zfs_snapshot(const char *path)
+zfs_snapshot(libzfs_handle_t *hdl, const char *path)
{
- char reason[64];
const char *delim;
char *parent;
zfs_handle_t *zhp;
zfs_cmd_t zc = { 0 };
int ret;
+ char errbuf[1024];
- /* validate the snapshot name */
- if (!zfs_validate_name(path, ZFS_TYPE_SNAPSHOT, reason,
- sizeof (reason))) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot snapshot '%s': %s in snapshot name"), path,
- reason);
- return (-1);
- }
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot snapshot '%s'"), path);
+
+ /* validate the target name */
+ if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT))
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
/* make sure we have a snapshot */
if ((delim = strchr(path, '@')) == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot snapshot '%s': missing '@' delim in snapshot "
- "name"), path);
- zfs_error(dgettext(TEXT_DOMAIN,
- "use 'zfs create' to create a filesystem"));
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "missing '@' delimeter in snapshot name"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
}
/* make sure the parent exists and is of the appropriate type */
- parent = zfs_malloc(delim - path + 1);
+ if ((parent = zfs_alloc(hdl, delim - path + 1)) == NULL)
+ return (-1);
(void) strncpy(parent, path, delim - path);
parent[delim - path] = '\0';
- if ((zhp = zfs_open(parent, ZFS_TYPE_FILESYSTEM |
+ if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
ZFS_TYPE_VOLUME)) == NULL) {
free(parent);
return (-1);
@@ -2272,56 +2217,17 @@ zfs_snapshot(const char *path)
else
zc.zc_objset_type = DMU_OST_ZFS;
- ret = zfs_ioctl(ZFS_IOC_CREATE, &zc);
+ ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
- ret = zvol_create_link(path);
+ ret = zvol_create_link(zhp->zfs_hdl, path);
if (ret != 0)
- (void) zfs_ioctl(ZFS_IOC_DESTROY, &zc);
+ (void) ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY,
+ &zc);
}
- if (ret != 0) {
- switch (errno) {
- case EPERM:
- /*
- * User doesn't have permission to create a snapshot
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "permission denied"), path);
- break;
-
- case EDQUOT:
- case ENOSPC:
- /*
- * Out of space in parent.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "not enough space in '%s'"), path, parent);
- break;
-
- case EEXIST:
- /*
- * Snapshot already exists.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "snapshot exists"), path);
- break;
-
- case ENOENT:
- /*
- * Shouldn't happen because we verified the parent
- * above. But there may be a race condition where it
- * has since been removed.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': "
- "no such %s"), parent,
- zfs_type_to_name(zhp->zfs_type));
- break;
-
- default:
- zfs_baderror(errno);
- }
- }
+ if (ret != 0)
+ (void) zfs_standard_error(hdl, errno, errbuf);
free(parent);
zfs_close(zhp);
@@ -2337,6 +2243,11 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
{
zfs_cmd_t zc = { 0 };
int ret;
+ char errbuf[1024];
+ libzfs_handle_t *hdl = zhp_to->zfs_hdl;
+
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot send '%s'"), zhp_to->zfs_name);
/* do the ioctl() */
(void) strlcpy(zc.zc_name, zhp_to->zfs_name, sizeof (zc.zc_name));
@@ -2348,34 +2259,14 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
}
zc.zc_cookie = STDOUT_FILENO;
- ret = zfs_ioctl(ZFS_IOC_SENDBACKUP, &zc);
+ ret = ioctl(zhp_to->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc);
if (ret != 0) {
switch (errno) {
- case EPERM:
- /*
- * User doesn't have permission to do a send
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot send '%s': "
- "permission denied"), zhp_to->zfs_name);
- break;
case EXDEV:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot send incremental from %s:\n"
- "it is not an earlier snapshot from the "
- "same fs as %s"),
- zhp_from->zfs_name, zhp_to->zfs_name);
- break;
-
- case ENOENT:
- /*
- * Shouldn't happen because we verified the parent
- * above. But there may be a race condition where it
- * has since been removed.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot open: "
- "no such snapshot"));
- break;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "not an ealier snapshot from the same fs"));
+ return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
case EDQUOT:
case EFBIG:
@@ -2388,18 +2279,11 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
case ERANGE:
case EFAULT:
case EROFS:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot write stream: %s"),
- strerror(errno));
- break;
-
- case EINTR:
- zfs_error(dgettext(TEXT_DOMAIN,
- "send failed: signal received"));
- break;
+ zfs_error_aux(hdl, strerror(errno));
+ return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
default:
- zfs_baderror(errno);
+ return (zfs_standard_error(hdl, errno, errbuf));
}
}
@@ -2410,7 +2294,8 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
* Restores a backup of tosnap from stdin.
*/
int
-zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
+zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix,
+ int verbose, int dryrun)
{
zfs_cmd_t zc = { 0 };
time_t begin_time;
@@ -2418,9 +2303,13 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
char *cp;
dmu_replay_record_t drr;
struct drr_begin *drrb = &zc.zc_begin_record;
+ char errbuf[1024];
begin_time = time(NULL);
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot receive"));
+
/* trim off snapname, if any */
(void) strcpy(zc.zc_name, tosnap);
cp = strchr(zc.zc_name, '@');
@@ -2437,31 +2326,26 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
} while (size > 0);
if (size < 0 || bytes != sizeof (drr)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: invalid stream "
- "(couldn't read first record)"));
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+ "stream (failed to read first record)"));
+ return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
}
zc.zc_begin_record = drr.drr_u.drr_begin;
if (drrb->drr_magic != DMU_BACKUP_MAGIC &&
drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: invalid stream "
- "(invalid magic number)"));
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+ "stream (bad magic number)"));
+ return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
}
if (drrb->drr_version != DMU_BACKUP_VERSION &&
drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) {
- if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
- drrb->drr_version = BSWAP_64(drrb->drr_version);
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: only stream version 0x%llx is supported, "
- "stream is version %llx."),
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version "
+ "0x%llx is supported (stream is version 0x%llx)"),
DMU_BACKUP_VERSION, drrb->drr_version);
- return (-1);
+ return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
}
/*
@@ -2470,10 +2354,9 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
(void) strcpy(zc.zc_filename, tosnap);
if (isprefix) {
if (strchr(tosnap, '@') != NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: "
- "argument to -d must be a filesystem"));
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "destination must be a filesystem"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
}
cp = strchr(drr.drr_u.drr_begin.drr_toname, '/');
@@ -2490,11 +2373,8 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
* snapname from the backup.
*/
cp = strchr(drr.drr_u.drr_begin.drr_toname, '@');
- if (cp == NULL || strlen(tosnap) + strlen(cp) >= MAXNAMELEN) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: invalid snapshot name"));
- return (-1);
- }
+ if (cp == NULL || strlen(tosnap) + strlen(cp) >= MAXNAMELEN)
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
(void) strcat(zc.zc_filename, cp);
}
@@ -2508,20 +2388,16 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
*cp = '\0';
/* make sure destination fs exists */
- h = zfs_open(zc.zc_name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
- if (h == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive incrememtal stream: destination\n"
- "filesystem %s does not exist"),
- zc.zc_name);
+ h = zfs_open(hdl, zc.zc_name,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (h == NULL)
return (-1);
- }
if (!dryrun) {
/* unmount destination fs or remove device link. */
if (h->zfs_type == ZFS_TYPE_FILESYSTEM) {
(void) zfs_unmount(h, NULL, 0);
} else {
- (void) zvol_remove_link(h->zfs_name);
+ (void) zvol_remove_link(hdl, h->zfs_name);
}
}
zfs_close(h);
@@ -2535,24 +2411,18 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
cp = strchr(zc.zc_name, '@');
if (cp)
*cp = '\0';
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: destination fs %s already exists"),
- zc.zc_name);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "destination '%s' already exists"), zc.zc_name);
+ return (zfs_error(hdl, EZFS_EXISTS, errbuf));
}
if (isprefix) {
zfs_handle_t *h;
/* make sure prefix exists */
- h = zfs_open(tosnap, ZFS_TYPE_FILESYSTEM);
- if (h == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: "
- "%s is an invalid destination"),
- tosnap);
+ h = zfs_open(hdl, tosnap, ZFS_TYPE_FILESYSTEM);
+ if (h == NULL)
return (-1);
- }
zfs_close(h);
/* create any necessary ancestors up to prefix */
@@ -2569,24 +2439,25 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
const char *opname;
*cp = '\0';
- opname = "create";
- if (zfs_create(zc.zc_name, ZFS_TYPE_FILESYSTEM,
- NULL, NULL) != 0) {
+ opname = dgettext(TEXT_DOMAIN, "create");
+ if (zfs_create(hdl, zc.zc_name,
+ ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0) {
if (errno == EEXIST)
continue;
goto ancestorerr;
}
- opname = "open";
- h = zfs_open(zc.zc_name, ZFS_TYPE_FILESYSTEM);
+ opname = dgettext(TEXT_DOMAIN, "open");
+ h = zfs_open(hdl, zc.zc_name,
+ ZFS_TYPE_FILESYSTEM);
if (h == NULL)
goto ancestorerr;
- opname = "mount";
+ opname = dgettext(TEXT_DOMAIN, "mount");
if (zfs_mount(h, NULL, 0) != 0)
goto ancestorerr;
- opname = "share";
+ opname = dgettext(TEXT_DOMAIN, "share");
if (zfs_share(h) != 0)
goto ancestorerr;
@@ -2594,22 +2465,21 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
continue;
ancestorerr:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: couldn't %s ancestor %s"),
- opname, zc.zc_name);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "failed to %s ancestor '%s'"), opname,
+ zc.zc_name);
+ return (zfs_error(hdl, EZFS_BADRESTORE,
+ errbuf));
}
}
/* Make sure destination fs does not exist */
cp = strchr(zc.zc_name, '@');
*cp = '\0';
- if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) == 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive full stream: "
- "destination filesystem %s already exists"),
- zc.zc_name);
- return (-1);
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "destination '%s' exists"), zc.zc_name);
+ return (zfs_error(hdl, EZFS_EXISTS, errbuf));
}
/* Do the recvbackup ioctl to the fs's parent. */
@@ -2630,21 +2500,20 @@ ancestorerr:
}
if (dryrun)
return (0);
- err = ioctl_err = zfs_ioctl(ZFS_IOC_RECVBACKUP, &zc);
+ err = ioctl_err = ioctl(hdl->libzfs_fd, ZFS_IOC_RECVBACKUP, &zc);
if (ioctl_err != 0) {
switch (errno) {
case ENODEV:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: "
- "most recent snapshot does not "
- "match incremental source"));
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "most recent snapshot does not match incremental "
+ "source"));
+ (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
break;
case ETXTBSY:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: "
- "destination has been modified since "
- "most recent snapshot --\n"
- "use 'zfs rollback' to discard changes"));
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "destination has been modified since most recent "
+ "snapshot"));
+ (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
break;
case EEXIST:
if (drrb->drr_fromguid == 0) {
@@ -2652,45 +2521,21 @@ ancestorerr:
cp = strchr(zc.zc_filename, '@');
*cp = '\0';
}
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive to %s: destination already exists"),
- zc.zc_filename);
- break;
- case ENOENT:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: destination does not exist"));
- break;
- case EBUSY:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: destination is in use"));
- break;
- case ENOSPC:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: out of space"));
- break;
- case EDQUOT:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: quota exceeded"));
- break;
- case EINTR:
- zfs_error(dgettext(TEXT_DOMAIN,
- "receive failed: signal received"));
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "destination already exists"));
+ (void) zfs_error(hdl, EZFS_EXISTS, dgettext(TEXT_DOMAIN,
+ "cannot restore to %s"), zc.zc_filename);
break;
case EINVAL:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: invalid stream"));
+ (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
case ECKSUM:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: invalid stream "
- "(checksum mismatch)"));
- break;
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot receive: permission denied"));
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid stream (checksum mismatch)"));
+ (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
default:
- zfs_baderror(errno);
+ (void) zfs_standard_error(hdl, errno, errbuf);
}
}
@@ -2705,16 +2550,17 @@ ancestorerr:
zfs_handle_t *h;
*cp = '\0';
- h = zfs_open(zc.zc_filename,
+ h = zfs_open(hdl, zc.zc_filename,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
*cp = '@';
if (h) {
if (h->zfs_type == ZFS_TYPE_FILESYSTEM) {
err = zfs_mount(h, NULL, 0);
} else {
- err = zvol_create_link(h->zfs_name);
+ err = zvol_create_link(hdl, h->zfs_name);
if (err == 0 && ioctl_err == 0)
- err = zvol_create_link(zc.zc_filename);
+ err = zvol_create_link(hdl,
+ zc.zc_filename);
}
zfs_close(h);
}
@@ -2750,7 +2596,7 @@ typedef struct rollback_data {
uint64_t cb_create; /* creation time reference */
prop_changelist_t *cb_clp; /* changelist pointer */
int cb_error;
- int cb_dependent;
+ boolean_t cb_dependent;
} rollback_data_t;
static int
@@ -2764,9 +2610,9 @@ rollback_destroy(zfs_handle_t *zhp, void *data)
zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
cbp->cb_create) {
- cbp->cb_dependent = TRUE;
+ cbp->cb_dependent = B_TRUE;
(void) zfs_iter_dependents(zhp, rollback_destroy, cbp);
- cbp->cb_dependent = FALSE;
+ cbp->cb_dependent = B_FALSE;
if (zfs_destroy(zhp) != 0)
cbp->cb_error = 1;
@@ -2797,7 +2643,7 @@ do_rollback(zfs_handle_t *zhp)
zhp->zfs_type == ZFS_TYPE_VOLUME);
if (zhp->zfs_type == ZFS_TYPE_VOLUME &&
- zvol_remove_link(zhp->zfs_name) != 0)
+ zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
return (-1);
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
@@ -2814,58 +2660,13 @@ do_rollback(zfs_handle_t *zhp)
* condition where the user has taken a snapshot since we verified that
* this was the most recent.
*/
- if ((ret = zfs_ioctl(ZFS_IOC_ROLLBACK, &zc)) != 0) {
- switch (errno) {
- case EPERM:
- /*
- * The user doesn't have permission to rollback the
- * given dataset.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': "
- "permission denied"), zhp->zfs_name);
- break;
-
- case EDQUOT:
- case ENOSPC:
- /*
- * The parent dataset doesn't have enough space to
- * rollback to the last snapshot.
- */
- {
- char parent[ZFS_MAXNAMELEN];
- (void) parent_name(zhp->zfs_name, parent,
- sizeof (parent));
- zfs_error(dgettext(TEXT_DOMAIN, "cannot "
- "rollback '%s': out of space"), parent);
- }
- break;
-
- case ENOENT:
- /*
- * The dataset doesn't exist. This shouldn't happen
- * except in race conditions.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': "
- "no such %s"), zhp->zfs_name,
- zfs_type_to_name(zhp->zfs_type));
- break;
-
- case EBUSY:
- /*
- * The filesystem is busy. This should have been caught
- * by the caller before getting here, but there may be
- * an unexpected problem.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': "
- "%s is busy"), zhp->zfs_name,
- zfs_type_to_name(zhp->zfs_type));
- break;
-
- default:
- zfs_baderror(errno);
- }
+ if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_ROLLBACK,
+ &zc)) != 0) {
+ (void) zfs_standard_error(zhp->zfs_hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
+ zhp->zfs_name);
} else if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
- ret = zvol_create_link(zhp->zfs_name);
+ ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
}
return (ret);
@@ -2946,9 +2747,10 @@ zfs_iter_dependents(zfs_handle_t *zhp, zfs_iter_f func, void *data)
zfs_handle_t *child;
int ret = 0;
- dependents = get_dependents(zhp->zfs_name, &count);
+ dependents = get_dependents(zhp->zfs_hdl, zhp->zfs_name, &count);
for (i = 0; i < count; i++) {
- if ((child = make_dataset_handle(dependents[i])) == NULL)
+ if ((child = make_dataset_handle(zhp->zfs_hdl,
+ dependents[i])) == NULL)
continue;
if ((ret = func(child, data)) != 0)
@@ -2970,10 +2772,11 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
{
int ret;
zfs_cmd_t zc = { 0 };
- char reason[64];
char *delim;
prop_changelist_t *cl;
char parent[ZFS_MAXNAMELEN];
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ char errbuf[1024];
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_prop_value, target, sizeof (zc.zc_prop_value));
@@ -2982,22 +2785,21 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
if (strcmp(zhp->zfs_name, target) == 0)
return (0);
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot rename to '%s'"), target);
+
/*
* Make sure the target name is valid
*/
- if (!zfs_validate_name(target, zhp->zfs_type, reason,
- sizeof (reason))) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create '%s': %s in %s name"), target, reason,
- zfs_type_to_name(zhp->zfs_type));
- return (-1);
- }
+ if (!zfs_validate_name(hdl, target, zhp->zfs_type))
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+
if ((delim = strchr(target, '@')) == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot rename to '%s': not a snapshot"), target);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "not a snapshot"));
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
}
/*
@@ -3005,17 +2807,16 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
*/
if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
zhp->zfs_name[delim - target] != '@') {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot rename to '%s': snapshots must be part "
- "of same dataset"), target);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "snapshots must be part of same dataset"));
+ return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
}
(void) strncpy(parent, target, delim - target);
parent[delim - target] = '\0';
} else {
/* validate parents */
- if (check_parents(target, zhp->zfs_type) != 0)
+ if (check_parents(hdl, target) != 0)
return (-1);
(void) parent_name(target, parent, sizeof (parent));
@@ -3024,28 +2825,30 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
verify((delim = strchr(target, '/')) != NULL);
if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
zhp->zfs_name[delim - target] != '/') {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot rename to '%s': "
- "datasets must be within same pool"), target);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "datasets must be within same pool"));
+ return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
}
}
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
+
if (getzoneid() == GLOBAL_ZONEID &&
zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot rename %s, "
- "dataset is used in a non-global zone"), zhp->zfs_name);
- return (-1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset is used in a non-global zone"));
+ return (zfs_error(hdl, EZFS_ZONED, errbuf));
}
if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0)) == NULL)
- return (1);
+ return (-1);
if (changelist_haszonedchild(cl)) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot rename '%s': child dataset with inherited "
- "mountpoint is used in a non-global zone"), zhp->zfs_name);
- ret = -1;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "child dataset with inherited mountpoint is used "
+ "in a non-global zone"));
+ ret = zfs_error(hdl, EZFS_ZONED, errbuf);
goto error;
}
@@ -3057,59 +2860,8 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
else
zc.zc_objset_type = DMU_OST_ZFS;
- if ((ret = zfs_ioctl(ZFS_IOC_RENAME, &zc)) != 0) {
- switch (errno) {
- case EPERM:
- /*
- * The user doesn't have permission to rename the
- * given dataset.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s': "
- "permission denied"), zhp->zfs_name);
- break;
-
- case EDQUOT:
- case ENOSPC:
- /*
- * Not enough space in the parent dataset.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot "
- "rename '%s': not enough space in '%s'"),
- zhp->zfs_name, parent);
- break;
-
- case ENOENT:
- /*
- * The destination doesn't exist.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s' "
- "to '%s': destination doesn't exist"),
- zhp->zfs_name, target);
- break;
-
- case EEXIST:
- /*
- * The destination already exists.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s' "
- "to '%s': destination already exists"),
- zhp->zfs_name, target);
- break;
-
- case EBUSY:
- /*
- * The filesystem is busy. This should have been caught
- * by the caller before getting here, but there may be
- * an unexpected problem.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s': "
- "%s is busy"), zhp->zfs_name,
- zfs_type_to_name(zhp->zfs_type));
- break;
-
- default:
- zfs_baderror(errno);
- }
+ if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc)) != 0) {
+ (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
/*
* On failure, we still want to remount any filesystems that
@@ -3132,24 +2884,18 @@ error:
* poke devfsadm to create the /dev link, and then wait for the link to appear.
*/
int
-zvol_create_link(const char *dataset)
+zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
{
zfs_cmd_t zc = { 0 };
- di_devlink_handle_t hdl;
+ di_devlink_handle_t dhdl;
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
/*
* Issue the appropriate ioctl.
*/
- if (zfs_ioctl(ZFS_IOC_CREATE_MINOR, &zc) != 0) {
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
switch (errno) {
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create "
- "device links for '%s': permission denied"),
- dataset);
- break;
-
case EEXIST:
/*
* Silently ignore the case where the link already
@@ -3159,22 +2905,24 @@ zvol_create_link(const char *dataset)
return (0);
default:
- zfs_baderror(errno);
+ return (zfs_standard_error(hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot create device links "
+ "for '%s'"), dataset));
}
-
- return (-1);
}
/*
* Call devfsadm and wait for the links to magically appear.
*/
- if ((hdl = di_devlink_init(ZFS_DRIVER, DI_MAKE_LINK)) == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot create device links for '%s'"), dataset);
- (void) zfs_ioctl(ZFS_IOC_REMOVE_MINOR, &zc);
+ if ((dhdl = di_devlink_init(ZFS_DRIVER, DI_MAKE_LINK)) == NULL) {
+ zfs_error_aux(hdl, strerror(errno));
+ (void) zfs_error(hdl, EZFS_DEVLINKS,
+ dgettext(TEXT_DOMAIN, "cannot create device links "
+ "for '%s'"), dataset);
+ (void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
return (-1);
} else {
- (void) di_devlink_fini(&hdl);
+ (void) di_devlink_fini(&dhdl);
}
return (0);
@@ -3184,26 +2932,14 @@ zvol_create_link(const char *dataset)
* Remove a minor node for the given zvol and the associated /dev links.
*/
int
-zvol_remove_link(const char *dataset)
+zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
{
zfs_cmd_t zc = { 0 };
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- if (zfs_ioctl(ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
switch (errno) {
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot remove "
- "device links for '%s': permission denied"),
- dataset);
- break;
-
- case EBUSY:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot remove "
- "device links for '%s': volume is in use"),
- dataset);
- break;
-
case ENXIO:
/*
* Silently ignore the case where the link no longer
@@ -3213,10 +2949,10 @@ zvol_remove_link(const char *dataset)
return (0);
default:
- zfs_baderror(errno);
+ return (zfs_standard_error(hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot remove device "
+ "links for '%s'"), dataset));
}
-
- return (-1);
}
return (0);
diff --git a/usr/src/lib/libzfs/common/libzfs_graph.c b/usr/src/lib/libzfs/common/libzfs_graph.c
index 4c7bb547ee..e86a6c9377 100644
--- a/usr/src/lib/libzfs/common/libzfs_graph.c
+++ b/usr/src/lib/libzfs/common/libzfs_graph.c
@@ -121,9 +121,12 @@ typedef struct zfs_graph {
* Allocate a new edge pointing to the target vertex.
*/
static zfs_edge_t *
-zfs_edge_create(zfs_vertex_t *dest)
+zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
{
- zfs_edge_t *zep = zfs_malloc(sizeof (zfs_edge_t));
+ zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
+
+ if (zep == NULL)
+ return (NULL);
zep->ze_dest = dest;
@@ -143,15 +146,23 @@ zfs_edge_destroy(zfs_edge_t *zep)
* Allocate a new vertex with the given name.
*/
static zfs_vertex_t *
-zfs_vertex_create(const char *dataset)
+zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
{
- zfs_vertex_t *zvp = zfs_malloc(sizeof (zfs_vertex_t));
+ zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
+
+ if (zvp == NULL)
+ return (NULL);
assert(strlen(dataset) < ZFS_MAXNAMELEN);
(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
- zvp->zv_edges = zfs_malloc(MIN_EDGECOUNT * sizeof (void *));
+ if ((zvp->zv_edges = zfs_alloc(hdl,
+ MIN_EDGECOUNT * sizeof (void *))) == NULL) {
+ free(zvp);
+ return (NULL);
+ }
+
zvp->zv_edgealloc = MIN_EDGECOUNT;
return (zvp);
@@ -175,15 +186,22 @@ zfs_vertex_destroy(zfs_vertex_t *zvp)
/*
* Given a vertex, add an edge to the destination vertex.
*/
-static void
-zfs_vertex_add_edge(zfs_vertex_t *zvp, zfs_vertex_t *dest)
+static int
+zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
+ zfs_vertex_t *dest)
{
- zfs_edge_t *zep = zfs_edge_create(dest);
+ zfs_edge_t *zep = zfs_edge_create(hdl, dest);
+
+ if (zep == NULL)
+ return (-1);
if (zvp->zv_edgecount == zvp->zv_edgealloc) {
- zfs_edge_t **newedges = zfs_malloc(zvp->zv_edgealloc * 2 *
+ zfs_edge_t **newedges = zfs_alloc(hdl, zvp->zv_edgealloc * 2 *
sizeof (void *));
+ if (newedges == NULL)
+ return (-1);
+
bcopy(zvp->zv_edges, newedges,
zvp->zv_edgealloc * sizeof (void *));
@@ -193,6 +211,8 @@ zfs_vertex_add_edge(zfs_vertex_t *zvp, zfs_vertex_t *dest)
}
zvp->zv_edges[zvp->zv_edgecount++] = zep;
+
+ return (0);
}
static int
@@ -227,12 +247,19 @@ zfs_vertex_sort_edges(zfs_vertex_t *zvp)
* datasets in the pool.
*/
static zfs_graph_t *
-zfs_graph_create(size_t size)
+zfs_graph_create(libzfs_handle_t *hdl, size_t size)
{
- zfs_graph_t *zgp = zfs_malloc(sizeof (zfs_graph_t));
+ zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
+
+ if (zgp == NULL)
+ return (NULL);
zgp->zg_size = size;
- zgp->zg_hash = zfs_malloc(size * sizeof (zfs_vertex_t *));
+ if ((zgp->zg_hash = zfs_alloc(hdl,
+ size * sizeof (zfs_vertex_t *))) == NULL) {
+ free(zgp);
+ return (NULL);
+ }
return (zgp);
}
@@ -280,7 +307,8 @@ zfs_graph_hash(zfs_graph_t *zgp, const char *str)
* Given a dataset name, finds the associated vertex, creating it if necessary.
*/
static zfs_vertex_t *
-zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg)
+zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
+ uint64_t txg)
{
size_t idx = zfs_graph_hash(zgp, dataset);
zfs_vertex_t *zvp;
@@ -293,7 +321,9 @@ zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg)
}
}
- zvp = zfs_vertex_create(dataset);
+ if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
+ return (NULL);
+
zvp->zv_next = zgp->zg_hash[idx];
zvp->zv_txg = txg;
zgp->zg_hash[idx] = zvp;
@@ -308,43 +338,52 @@ zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg)
* created it as a destination of another edge. If 'dest' is NULL, then this
* is an individual vertex (i.e. the starting vertex), so don't add an edge.
*/
-static void
-zfs_graph_add(zfs_graph_t *zgp, const char *source, const char *dest,
- uint64_t txg)
+static int
+zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
+ const char *dest, uint64_t txg)
{
zfs_vertex_t *svp, *dvp;
- svp = zfs_graph_lookup(zgp, source, 0);
+ if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
+ return (-1);
svp->zv_visited = 1;
if (dest != NULL) {
- dvp = zfs_graph_lookup(zgp, dest, txg);
- zfs_vertex_add_edge(svp, dvp);
+ dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
+ if (dvp == NULL)
+ return (-1);
+ if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
+ return (-1);
}
+
+ return (0);
}
/*
* Iterate over all children of the given dataset, adding any vertices as
- * necessary. Returns 0 if no cloned snapshots were seen, 1 otherwise. This is
+ * necessary. Returns 0 if no cloned snapshots were seen, -1 if there was an
+ * error, or 1 otherwise. This is
* a simple recursive algorithm - the ZFS namespace typically is very flat. We
* manually invoke the necessary ioctl() calls to avoid the overhead and
* additional semantics of zfs_open().
*/
static int
-iterate_children(zfs_graph_t *zgp, const char *dataset)
+iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
{
zfs_cmd_t zc = { 0 };
- int ret = 0;
+ int ret = 0, err;
zfs_vertex_t *zvp;
/*
* Look up the source vertex, and avoid it if we've seen it before.
*/
- zvp = zfs_graph_lookup(zgp, dataset, 0);
+ zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
+ if (zvp == NULL)
+ return (-1);
if (zvp->zv_visited)
return (0);
for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- zfs_ioctl(ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+ ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
/*
@@ -358,32 +397,38 @@ iterate_children(zfs_graph_t *zgp, const char *dataset)
* dataset and clone statistics. If this fails, the dataset has
* since been removed, and we're pretty much screwed anyway.
*/
- if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0)
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
continue;
/*
* Add an edge between the parent and the child.
*/
- zfs_graph_add(zgp, dataset, zc.zc_name,
- zc.zc_objset_stats.dds_creation_txg);
+ if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+ zc.zc_objset_stats.dds_creation_txg) != 0)
+ return (-1);
/*
* If this dataset has a clone parent, add an appropriate edge.
*/
- if (zc.zc_objset_stats.dds_clone_of[0] != '\0')
- zfs_graph_add(zgp, zc.zc_objset_stats.dds_clone_of,
- zc.zc_name, zc.zc_objset_stats.dds_creation_txg);
+ if (zc.zc_objset_stats.dds_clone_of[0] != '\0' &&
+ zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
+ zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
+ return (-1);
/*
* Iterate over all children
*/
- ret |= iterate_children(zgp, zc.zc_name);
+ err = iterate_children(hdl, zgp, zc.zc_name);
+ if (err == -1)
+ return (-1);
+ else if (err == 1)
+ ret = 1;
/*
* Indicate if we found a dataset with a non-zero clone count.
*/
if (zc.zc_objset_stats.dds_num_clones != 0)
- ret |= 1;
+ ret = 1;
}
/*
@@ -392,7 +437,7 @@ iterate_children(zfs_graph_t *zgp, const char *dataset)
bzero(&zc, sizeof (zc));
for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- zfs_ioctl(ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
+ ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
/*
@@ -400,20 +445,21 @@ iterate_children(zfs_graph_t *zgp, const char *dataset)
* dataset and clone statistics. If this fails, the dataset has
* since been removed, and we're pretty much screwed anyway.
*/
- if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0)
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
continue;
/*
* Add an edge between the parent and the child.
*/
- zfs_graph_add(zgp, dataset, zc.zc_name,
- zc.zc_objset_stats.dds_creation_txg);
+ if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+ zc.zc_objset_stats.dds_creation_txg) != 0)
+ return (-1);
/*
* Indicate if we found a dataset with a non-zero clone count.
*/
if (zc.zc_objset_stats.dds_num_clones != 0)
- ret |= 1;
+ ret = 1;
}
zvp->zv_visited = 1;
@@ -428,20 +474,24 @@ iterate_children(zfs_graph_t *zgp, const char *dataset)
* over all datasets.
*/
static zfs_graph_t *
-construct_graph(const char *dataset)
+construct_graph(libzfs_handle_t *hdl, const char *dataset)
{
- zfs_graph_t *zgp = zfs_graph_create(ZFS_GRAPH_SIZE);
+ zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE);
zfs_cmd_t zc = { 0 };
+ int ret = 0;
+
+ if (zgp == NULL)
+ return (zgp);
/*
* We need to explicitly check whether this dataset has clones or not,
* since iterate_children() only checks the children.
*/
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- (void) zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc);
+ (void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
if (zc.zc_objset_stats.dds_num_clones != 0 ||
- iterate_children(zgp, dataset) != 0) {
+ (ret = iterate_children(hdl, zgp, dataset)) != 0) {
/*
* Determine pool name and try again.
*/
@@ -449,17 +499,29 @@ construct_graph(const char *dataset)
if ((slash = strchr(dataset, '/')) != NULL ||
(slash = strchr(dataset, '@')) != NULL) {
- pool = zfs_malloc(slash - dataset + 1);
+ pool = zfs_alloc(hdl, slash - dataset + 1);
+ if (pool == NULL) {
+ zfs_graph_destroy(zgp);
+ return (NULL);
+ }
(void) strncpy(pool, dataset, slash - dataset);
pool[slash - dataset] = '\0';
- (void) iterate_children(zgp, pool);
- zfs_graph_add(zgp, pool, NULL, 0);
+ if (iterate_children(hdl, zgp, pool) == -1 ||
+ zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
+ free(pool);
+ zfs_graph_destroy(zgp);
+ return (NULL);
+ }
free(pool);
}
}
- zfs_graph_add(zgp, dataset, NULL, 0);
+
+ if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
+ zfs_graph_destroy(zgp);
+ return (NULL);
+ }
return (zgp);
}
@@ -469,27 +531,33 @@ construct_graph(const char *dataset)
* really just a depth first search, so that the deepest nodes appear first.
* hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
*/
-static void
-topo_sort(char **result, size_t *idx, zfs_vertex_t *zgv)
+static int
+topo_sort(libzfs_handle_t *hdl, char **result, size_t *idx, zfs_vertex_t *zgv)
{
int i;
/* avoid doing a search if we don't have to */
if (zgv->zv_visited == 2)
- return;
+ return (0);
zfs_vertex_sort_edges(zgv);
- for (i = 0; i < zgv->zv_edgecount; i++)
- topo_sort(result, idx, zgv->zv_edges[i]->ze_dest);
+ for (i = 0; i < zgv->zv_edgecount; i++) {
+ if (topo_sort(hdl, result, idx, zgv->zv_edges[i]->ze_dest) != 0)
+ return (-1);
+ }
/* we may have visited this in the course of the above */
if (zgv->zv_visited == 2)
- return;
+ return (0);
+
+ if ((result[*idx] = zfs_alloc(hdl,
+ strlen(zgv->zv_dataset) + 1)) == NULL)
+ return (-1);
- result[*idx] = zfs_malloc(strlen(zgv->zv_dataset) + 1);
(void) strcpy(result[*idx], zgv->zv_dataset);
*idx += 1;
zgv->zv_visited = 2;
+ return (0);
}
/*
@@ -498,19 +566,33 @@ topo_sort(char **result, size_t *idx, zfs_vertex_t *zgv)
* sort, and then return the array of strings to the caller.
*/
char **
-get_dependents(const char *dataset, size_t *count)
+get_dependents(libzfs_handle_t *hdl, const char *dataset, size_t *count)
{
char **result;
zfs_graph_t *zgp;
zfs_vertex_t *zvp;
- zgp = construct_graph(dataset);
- result = zfs_malloc(zgp->zg_nvertex * sizeof (char *));
+ if ((zgp = construct_graph(hdl, dataset)) == NULL)
+ return (NULL);
- zvp = zfs_graph_lookup(zgp, dataset, 0);
+ if ((result = zfs_alloc(hdl,
+ zgp->zg_nvertex * sizeof (char *))) == NULL) {
+ zfs_graph_destroy(zgp);
+ return (NULL);
+ }
+
+ if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
+ free(result);
+ zfs_graph_destroy(zgp);
+ return (NULL);
+ }
*count = 0;
- topo_sort(result, count, zvp);
+ if (topo_sort(hdl, result, count, zvp) != 0) {
+ free(result);
+ zfs_graph_destroy(zgp);
+ return (NULL);
+ }
/*
* Get rid of the last entry, which is our starting vertex and not
diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h
index 76bca21242..2c5e890767 100644
--- a/usr/src/lib/libzfs/common/libzfs_impl.h
+++ b/usr/src/lib/libzfs/common/libzfs_impl.h
@@ -34,13 +34,29 @@
#include <sys/zfs_acl.h>
#include <sys/nvpair.h>
+#include <libuutil.h>
#include <libzfs.h>
#ifdef __cplusplus
extern "C" {
#endif
+struct libzfs_handle {
+ int libzfs_error;
+ int libzfs_fd;
+ FILE *libzfs_mnttab;
+ FILE *libzfs_sharetab;
+ uu_avl_pool_t *libzfs_ns_avlpool;
+ uu_avl_t *libzfs_ns_avl;
+ uint64_t libzfs_ns_gen;
+ int libzfs_desc_active;
+ char libzfs_action[1024];
+ char libzfs_desc[1024];
+ int libzfs_printerr;
+};
+
struct zfs_handle {
+ libzfs_handle_t *zfs_hdl;
char zfs_name[ZFS_MAXNAMELEN];
zfs_type_t zfs_type;
dmu_objset_stats_t zfs_dmustats;
@@ -52,6 +68,7 @@ struct zfs_handle {
};
struct zpool_handle {
+ libzfs_handle_t *zpool_hdl;
char zpool_name[ZPOOL_MAXNAMELEN];
int zpool_state;
size_t zpool_config_size;
@@ -61,18 +78,16 @@ struct zpool_handle {
size_t zpool_error_count;
};
-void zfs_error(const char *, ...);
-void zfs_fatal(const char *, ...);
-void *zfs_malloc(size_t);
-char *zfs_strdup(const char *);
-void no_memory(void);
+int zfs_error(libzfs_handle_t *, int, const char *, ...);
+void zfs_error_aux(libzfs_handle_t *, const char *, ...);
+void *zfs_alloc(libzfs_handle_t *, size_t);
+char *zfs_strdup(libzfs_handle_t *, const char *);
+int no_memory(libzfs_handle_t *);
-#define zfs_baderror(err) \
- (zfs_fatal(dgettext(TEXT_DOMAIN, \
- "internal error: unexpected error %d at line %d of %s"), \
- (err), (__LINE__), (__FILE__)))
+int zfs_standard_error(libzfs_handle_t *, int, const char *, ...);
+int zpool_standard_error(libzfs_handle_t *, int, const char *, ...);
-char **get_dependents(const char *, size_t *);
+char **get_dependents(libzfs_handle_t *, const char *, size_t *);
typedef struct prop_changelist prop_changelist_t;
@@ -87,17 +102,15 @@ int changelist_haszonedchild(prop_changelist_t *);
void remove_mountpoint(zfs_handle_t *);
-zfs_handle_t *make_dataset_handle(const char *);
-void set_pool_health(nvlist_t *);
+zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
+int set_pool_health(nvlist_t *);
-zpool_handle_t *zpool_open_silent(const char *);
+zpool_handle_t *zpool_open_silent(libzfs_handle_t *, const char *);
-int zvol_create_link(const char *);
-int zvol_remove_link(const char *);
+int zvol_create_link(libzfs_handle_t *, const char *);
+int zvol_remove_link(libzfs_handle_t *, const char *);
-int zfs_ioctl(int, zfs_cmd_t *);
-FILE *zfs_mnttab(void);
-FILE *zfs_sharetab(void);
+void namespace_clear(libzfs_handle_t *);
#ifdef __cplusplus
}
diff --git a/usr/src/lib/libzfs/common/libzfs_import.c b/usr/src/lib/libzfs/common/libzfs_import.c
index 98519c3aae..ef34419146 100644
--- a/usr/src/lib/libzfs/common/libzfs_import.c
+++ b/usr/src/lib/libzfs/common/libzfs_import.c
@@ -78,7 +78,7 @@ typedef struct pool_entry {
} pool_entry_t;
typedef struct name_entry {
- const char *ne_name;
+ char *ne_name;
uint64_t ne_guid;
struct name_entry *ne_next;
} name_entry_t;
@@ -117,7 +117,7 @@ get_devid(const char *path)
* Go through and fix up any path and/or devid information for the given vdev
* configuration.
*/
-static void
+static int
fix_paths(nvlist_t *nv, name_entry_t *names)
{
nvlist_t **child;
@@ -130,8 +130,9 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
for (c = 0; c < children; c++)
- fix_paths(child[c], names);
- return;
+ if (fix_paths(child[c], names) != 0)
+ return (-1);
+ return (0);
}
/*
@@ -182,31 +183,56 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
}
if (best == NULL)
- return;
+ return (0);
- verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) == 0);
+ if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
+ return (-1);
if ((devid = get_devid(best->ne_name)) == NULL) {
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
} else {
- verify(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) == 0);
+ if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
+ return (-1);
devid_str_free(devid);
}
+
+ return (0);
}
/*
* Add the given configuration to the list of known devices.
*/
-static void
-add_config(pool_list_t *pl, const char *path, nvlist_t *config)
+static int
+add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
+ nvlist_t *config)
{
- uint64_t pool_guid, vdev_guid, top_guid, txg;
+ uint64_t pool_guid, vdev_guid, top_guid, txg, state;
pool_entry_t *pe;
vdev_entry_t *ve;
config_entry_t *ce;
name_entry_t *ne;
/*
+ * If this is a hot spare not currently in use, add it to the list of
+ * names to translate, but don't do anything else.
+ */
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+ &state) == 0 && state == POOL_STATE_SPARE &&
+ nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
+ if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+ return (-1);
+
+ if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+ free(ne);
+ return (-1);
+ }
+ ne->ne_guid = vdev_guid;
+ ne->ne_next = pl->names;
+ pl->names = ne;
+ return (0);
+ }
+
+ /*
* If we have a valid config but cannot read any of these fields, then
* it means we have a half-initialized label. In vdev_label_init()
* we write a label with txg == 0 so that we can identify the device
@@ -223,7 +249,7 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
&txg) != 0 || txg == 0) {
nvlist_free(config);
- return;
+ return (0);
}
/*
@@ -236,7 +262,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
}
if (pe == NULL) {
- pe = zfs_malloc(sizeof (pool_entry_t));
+ if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
+ nvlist_free(config);
+ return (-1);
+ }
pe->pe_guid = pool_guid;
pe->pe_next = pl->pools;
pl->pools = pe;
@@ -252,7 +281,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
}
if (ve == NULL) {
- ve = zfs_malloc(sizeof (vdev_entry_t));
+ if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
+ nvlist_free(config);
+ return (-1);
+ }
ve->ve_guid = top_guid;
ve->ve_next = pe->pe_vdevs;
pe->pe_vdevs = ve;
@@ -269,7 +301,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
}
if (ce == NULL) {
- ce = zfs_malloc(sizeof (config_entry_t));
+ if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
+ nvlist_free(config);
+ return (-1);
+ }
ce->ce_txg = txg;
ce->ce_config = config;
ce->ce_next = ve->ve_configs;
@@ -284,24 +319,31 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
* mappings so that we can fix up the configuration as necessary before
* doing the import.
*/
- ne = zfs_malloc(sizeof (name_entry_t));
+ if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+ return (-1);
+
+ if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+ free(ne);
+ return (-1);
+ }
- ne->ne_name = zfs_strdup(path);
ne->ne_guid = vdev_guid;
ne->ne_next = pl->names;
pl->names = ne;
+
+ return (0);
}
/*
* Returns true if the named pool matches the given GUID.
*/
-boolean_t
-pool_active(const char *name, uint64_t guid)
+static boolean_t
+pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid)
{
zpool_handle_t *zhp;
uint64_t theguid;
- if ((zhp = zpool_open_silent(name)) == NULL)
+ if ((zhp = zpool_open_silent(hdl, name)) == NULL)
return (B_FALSE);
verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
@@ -320,41 +362,42 @@ pool_active(const char *name, uint64_t guid)
* return to the user.
*/
static nvlist_t *
-get_configs(pool_list_t *pl)
+get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
{
- pool_entry_t *pe, *penext;
- vdev_entry_t *ve, *venext;
- config_entry_t *ce, *cenext;
- nvlist_t *ret, *config, *tmp, *nvtop, *nvroot;
- int config_seen;
+ pool_entry_t *pe;
+ vdev_entry_t *ve;
+ config_entry_t *ce;
+ nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
+ nvlist_t **spares;
+ uint_t i, nspares;
+ boolean_t config_seen;
uint64_t best_txg;
char *name;
zfs_cmd_t zc = { 0 };
- uint64_t guid;
+ uint64_t version, guid;
char *packed;
size_t len;
int err;
+ uint_t children = 0;
+ nvlist_t **child = NULL;
+ uint_t c;
- verify(nvlist_alloc(&ret, 0, 0) == 0);
+ if (nvlist_alloc(&ret, 0, 0) != 0)
+ goto nomem;
- for (pe = pl->pools; pe != NULL; pe = penext) {
- uint_t c;
- uint_t children = 0;
+ for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
uint64_t id;
- nvlist_t **child = NULL;
- penext = pe->pe_next;
-
- verify(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
- config_seen = FALSE;
+ if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
+ goto nomem;
+ config_seen = B_FALSE;
/*
* Iterate over all toplevel vdevs. Grab the pool configuration
* from the first one we find, and then go through the rest and
* add them as necessary to the 'vdevs' member of the config.
*/
- for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
- venext = ve->ve_next;
+ for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
/*
* Determine the best configuration for this vdev by
@@ -365,8 +408,10 @@ get_configs(pool_list_t *pl)
for (ce = ve->ve_configs; ce != NULL;
ce = ce->ce_next) {
- if (ce->ce_txg > best_txg)
+ if (ce->ce_txg > best_txg) {
tmp = ce->ce_config;
+ best_txg = ce->ce_txg;
+ }
}
if (!config_seen) {
@@ -374,6 +419,7 @@ get_configs(pool_list_t *pl)
* Copy the relevant pieces of data to the pool
* configuration:
*
+ * version
* pool guid
* name
* pool state
@@ -381,19 +427,27 @@ get_configs(pool_list_t *pl)
uint64_t state;
verify(nvlist_lookup_uint64(tmp,
+ ZPOOL_CONFIG_VERSION, &version) == 0);
+ if (nvlist_add_uint64(config,
+ ZPOOL_CONFIG_VERSION, version) != 0)
+ goto nomem;
+ verify(nvlist_lookup_uint64(tmp,
ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
- verify(nvlist_add_uint64(config,
- ZPOOL_CONFIG_POOL_GUID, guid) == 0);
+ if (nvlist_add_uint64(config,
+ ZPOOL_CONFIG_POOL_GUID, guid) != 0)
+ goto nomem;
verify(nvlist_lookup_string(tmp,
ZPOOL_CONFIG_POOL_NAME, &name) == 0);
- verify(nvlist_add_string(config,
- ZPOOL_CONFIG_POOL_NAME, name) == 0);
+ if (nvlist_add_string(config,
+ ZPOOL_CONFIG_POOL_NAME, name) != 0)
+ goto nomem;
verify(nvlist_lookup_uint64(tmp,
ZPOOL_CONFIG_POOL_STATE, &state) == 0);
- verify(nvlist_add_uint64(config,
- ZPOOL_CONFIG_POOL_STATE, state) == 0);
+ if (nvlist_add_uint64(config,
+ ZPOOL_CONFIG_POOL_STATE, state) != 0)
+ goto nomem;
- config_seen = TRUE;
+ config_seen = B_TRUE;
}
/*
@@ -406,8 +460,10 @@ get_configs(pool_list_t *pl)
if (id >= children) {
nvlist_t **newchild;
- newchild = zfs_malloc((id + 1) *
+ newchild = zfs_alloc(hdl, (id + 1) *
sizeof (nvlist_t *));
+ if (newchild == NULL)
+ goto nomem;
for (c = 0; c < children; c++)
newchild[c] = child[c];
@@ -416,23 +472,9 @@ get_configs(pool_list_t *pl)
child = newchild;
children = id + 1;
}
- verify(nvlist_dup(nvtop, &child[id], 0) == 0);
+ if (nvlist_dup(nvtop, &child[id], 0) != 0)
+ goto nomem;
- /*
- * Go through and free all config information.
- */
- for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
- cenext = ce->ce_next;
-
- nvlist_free(ce->ce_config);
- free(ce);
- }
-
- /*
- * Free this vdev entry, since it has now been merged
- * into the main config.
- */
- free(ve);
}
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
@@ -448,51 +490,63 @@ get_configs(pool_list_t *pl)
for (c = 0; c < children; c++)
if (child[c] == NULL) {
nvlist_t *missing;
- verify(nvlist_alloc(&missing, NV_UNIQUE_NAME,
- 0) == 0);
- verify(nvlist_add_string(missing,
- ZPOOL_CONFIG_TYPE, VDEV_TYPE_MISSING) == 0);
- verify(nvlist_add_uint64(missing,
- ZPOOL_CONFIG_ID, c) == 0);
- verify(nvlist_add_uint64(missing,
- ZPOOL_CONFIG_GUID, 0ULL) == 0);
+ if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
+ 0) != 0)
+ goto nomem;
+ if (nvlist_add_string(missing,
+ ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_MISSING) != 0 ||
+ nvlist_add_uint64(missing,
+ ZPOOL_CONFIG_ID, c) != 0 ||
+ nvlist_add_uint64(missing,
+ ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+ nvlist_free(missing);
+ goto nomem;
+ }
child[c] = missing;
}
/*
* Put all of this pool's top-level vdevs into a root vdev.
*/
- verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
- verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_ROOT) == 0);
- verify(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
- verify(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) == 0);
- verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- child, children) == 0);
+ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+ goto nomem;
+ if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_ROOT) != 0 ||
+ nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
+ nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
+ nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+ child, children) != 0) {
+ nvlist_free(nvroot);
+ goto nomem;
+ }
for (c = 0; c < children; c++)
nvlist_free(child[c]);
free(child);
+ children = 0;
+ child = NULL;
/*
* Go through and fix up any paths and/or devids based on our
* known list of vdev GUID -> path mappings.
*/
- fix_paths(nvroot, pl->names);
+ if (fix_paths(nvroot, pl->names) != 0) {
+ nvlist_free(nvroot);
+ goto nomem;
+ }
/*
* Add the root vdev to this pool's configuration.
*/
- verify(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- nvroot) == 0);
+ if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ nvroot) != 0) {
+ nvlist_free(nvroot);
+ goto nomem;
+ }
nvlist_free(nvroot);
/*
- * Free this pool entry.
- */
- free(pe);
-
- /*
* Determine if this pool is currently active, in which case we
* can't actually import it.
*/
@@ -501,8 +555,9 @@ get_configs(pool_list_t *pl)
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
&guid) == 0);
- if (pool_active(name, guid)) {
+ if (pool_active(hdl, name, guid)) {
nvlist_free(config);
+ config = NULL;
continue;
}
@@ -510,13 +565,14 @@ get_configs(pool_list_t *pl)
* Try to do the import in order to get vdev state.
*/
if ((err = nvlist_size(config, &len, NV_ENCODE_NATIVE)) != 0)
- zfs_baderror(err);
+ goto nomem;
- packed = zfs_malloc(len);
+ if ((packed = zfs_alloc(hdl, len)) == NULL)
+ goto nomem;
if ((err = nvlist_pack(config, &packed, &len,
NV_ENCODE_NATIVE, 0)) != 0)
- zfs_baderror(err);
+ goto nomem;
nvlist_free(config);
config = NULL;
@@ -525,37 +581,76 @@ get_configs(pool_list_t *pl)
zc.zc_config_src = (uint64_t)(uintptr_t)packed;
zc.zc_config_dst_size = 2 * len;
- zc.zc_config_dst = (uint64_t)(uintptr_t)
- zfs_malloc(zc.zc_config_dst_size);
+ if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+ zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL)
+ goto nomem;
- while ((err = zfs_ioctl(ZFS_IOC_POOL_TRYIMPORT,
+ while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
&zc)) != 0 && errno == ENOMEM) {
free((void *)(uintptr_t)zc.zc_config_dst);
- zc.zc_config_dst = (uint64_t)(uintptr_t)
- zfs_malloc(zc.zc_config_dst_size);
+ if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+ zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL)
+ goto nomem;
}
free(packed);
- if (err)
- zfs_baderror(errno);
+ if (err) {
+ (void) zpool_standard_error(hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot discover pools"));
+ free((void *)(uintptr_t)zc.zc_config_dst);
+ goto error;
+ }
- verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
- zc.zc_config_dst_size, &config, 0) == 0);
+ if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
+ zc.zc_config_dst_size, &config, 0) != 0) {
+ free((void *)(uintptr_t)zc.zc_config_dst);
+ goto nomem;
+ }
+ free((void *)(uintptr_t)zc.zc_config_dst);
- set_pool_health(config);
+ /*
+ * Go through and update the paths for spares, now that we have
+ * them.
+ */
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ for (i = 0; i < nspares; i++) {
+ if (fix_paths(spares[i], pl->names) != 0)
+ goto nomem;
+ }
+ }
+
+ if (set_pool_health(config) != 0)
+ goto nomem;
/*
* Add this pool to the list of configs.
*/
- verify(nvlist_add_nvlist(ret, name, config) == 0);
+ if (nvlist_add_nvlist(ret, name, config) != 0)
+ goto nomem;
nvlist_free(config);
-
- free((void *)(uintptr_t)zc.zc_config_dst);
+ config = NULL;
}
return (ret);
+
+nomem:
+ (void) no_memory(hdl);
+error:
+ if (config)
+ nvlist_free(config);
+ if (ret)
+ nvlist_free(ret);
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ if (child)
+ free(child);
+
+ return (NULL);
}
/*
@@ -572,19 +667,21 @@ label_offset(size_t size, int l)
* Given a file descriptor, read the label information and return an nvlist
* describing the configuration, if there is one.
*/
-nvlist_t *
-zpool_read_label(int fd)
+int
+zpool_read_label(int fd, nvlist_t **config)
{
struct stat64 statbuf;
int l;
vdev_label_t *label;
- nvlist_t *config;
uint64_t state, txg;
+ *config = NULL;
+
if (fstat64(fd, &statbuf) == -1)
- return (NULL);
+ return (0);
- label = zfs_malloc(sizeof (vdev_label_t));
+ if ((label = malloc(sizeof (vdev_label_t))) == NULL)
+ return (-1);
for (l = 0; l < VDEV_LABELS; l++) {
if (pread(fd, label, sizeof (vdev_label_t),
@@ -592,27 +689,29 @@ zpool_read_label(int fd)
continue;
if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
- sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0)
+ sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
continue;
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- &state) != 0 || state > POOL_STATE_DESTROYED) {
- nvlist_free(config);
+ if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+ &state) != 0 || state > POOL_STATE_SPARE) {
+ nvlist_free(*config);
continue;
}
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
- &txg) != 0 || txg == 0) {
- nvlist_free(config);
+ if (state != POOL_STATE_SPARE &&
+ (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+ &txg) != 0 || txg == 0)) {
+ nvlist_free(*config);
continue;
}
free(label);
- return (config);
+ return (0);
}
free(label);
- return (NULL);
+ *config = NULL;
+ return (0);
}
/*
@@ -621,17 +720,22 @@ zpool_read_label(int fd)
* given (argc is 0), then the default directory (/dev/dsk) is searched.
*/
nvlist_t *
-zpool_find_import(int argc, char **argv)
+zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
{
int i;
DIR *dirp;
struct dirent64 *dp;
char path[MAXPATHLEN];
struct stat64 statbuf;
- nvlist_t *ret, *config;
+ nvlist_t *ret = NULL, *config;
static char *default_dir = "/dev/dsk";
int fd;
pool_list_t pools = { 0 };
+ pool_entry_t *pe, *penext;
+ vdev_entry_t *ve, *venext;
+ config_entry_t *ce, *cenext;
+ name_entry_t *ne, *nenext;
+
if (argc == 0) {
argc = 1;
@@ -645,17 +749,18 @@ zpool_find_import(int argc, char **argv)
*/
for (i = 0; i < argc; i++) {
if (argv[i][0] != '/') {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot open '%s': must be an absolute path"),
+ (void) zfs_error(hdl, EZFS_BADPATH,
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"),
argv[i]);
- return (NULL);
+ goto error;
}
if ((dirp = opendir(argv[i])) == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot open '%s': %s"), argv[i],
- strerror(errno));
- return (NULL);
+ zfs_error_aux(hdl, strerror(errno));
+ (void) zfs_error(hdl, EZFS_BADPATH,
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+ argv[i]);
+ goto error;
}
/*
@@ -678,21 +783,49 @@ zpool_find_import(int argc, char **argv)
if ((fd = open64(path, O_RDONLY)) < 0)
continue;
- config = zpool_read_label(fd);
+ if ((zpool_read_label(fd, &config)) != 0) {
+ (void) no_memory(hdl);
+ goto error;
+ }
(void) close(fd);
if (config != NULL)
- add_config(&pools, path, config);
+ if (add_config(hdl, &pools, path, config) != 0)
+ goto error;
}
}
- ret = get_configs(&pools);
+ ret = get_configs(hdl, &pools);
+
+error:
+ for (pe = pools.pools; pe != NULL; pe = penext) {
+ penext = pe->pe_next;
+ for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
+ venext = ve->ve_next;
+ for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
+ cenext = ce->ce_next;
+ if (ce->ce_config)
+ nvlist_free(ce->ce_config);
+ free(ce);
+ }
+ free(ve);
+ }
+ free(pe);
+ }
+
+ for (ne = pools.names; ne != NULL; ne = nenext) {
+ nenext = ne->ne_next;
+ if (ne->ne_name)
+ free(ne->ne_name);
+ free(ne);
+ }
+
return (ret);
}
-int
+boolean_t
find_guid(nvlist_t *nv, uint64_t guid)
{
uint64_t tmp;
@@ -701,49 +834,94 @@ find_guid(nvlist_t *nv, uint64_t guid)
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
if (tmp == guid)
- return (TRUE);
+ return (B_TRUE);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
for (c = 0; c < children; c++)
if (find_guid(child[c], guid))
- return (TRUE);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+typedef struct spare_cbdata {
+ uint64_t cb_guid;
+ zpool_handle_t *cb_zhp;
+} spare_cbdata_t;
+
+static int
+find_spare(zpool_handle_t *zhp, void *data)
+{
+ spare_cbdata_t *cbp = data;
+ nvlist_t **spares;
+ uint_t i, nspares;
+ uint64_t guid;
+ nvlist_t *nvroot;
+
+ verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ for (i = 0; i < nspares; i++) {
+ verify(nvlist_lookup_uint64(spares[i],
+ ZPOOL_CONFIG_GUID, &guid) == 0);
+ if (guid == cbp->cb_guid) {
+ cbp->cb_zhp = zhp;
+ return (1);
+ }
+ }
}
- return (FALSE);
+ zpool_close(zhp);
+ return (0);
}
/*
- * Determines if the pool is in use. If so, it returns TRUE and the state of
+ * Determines if the pool is in use. If so, it returns true and the state of
* the pool as well as the name of the pool. Both strings are allocated and
* must be freed by the caller.
*/
int
-zpool_in_use(int fd, pool_state_t *state, char **namestr)
+zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
+ boolean_t *inuse)
{
nvlist_t *config;
char *name;
- int ret;
+ boolean_t ret;
uint64_t guid, vdev_guid;
zpool_handle_t *zhp;
nvlist_t *pool_config;
uint64_t stateval;
+ spare_cbdata_t cb = { 0 };
+
+ *inuse = B_FALSE;
- if ((config = zpool_read_label(fd)) == NULL)
- return (FALSE);
+ if (zpool_read_label(fd, &config) != 0) {
+ (void) no_memory(hdl);
+ return (-1);
+ }
+
+ if (config == NULL)
+ return (0);
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
&stateval) == 0);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &guid) == 0);
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
&vdev_guid) == 0);
+ if (stateval != POOL_STATE_SPARE) {
+ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+ &name) == 0);
+ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ &guid) == 0);
+ }
+
switch (stateval) {
case POOL_STATE_EXPORTED:
- ret = TRUE;
+ ret = B_TRUE;
break;
case POOL_STATE_ACTIVE:
@@ -754,14 +932,14 @@ zpool_in_use(int fd, pool_state_t *state, char **namestr)
* active pool that was disconnected without being explicitly
* exported.
*/
- if (pool_active(name, guid)) {
+ if (pool_active(hdl, name, guid)) {
/*
* Because the device may have been removed while
* offlined, we only report it as active if the vdev is
* still present in the config. Otherwise, pretend like
* it's not in use.
*/
- if ((zhp = zpool_open_canfail(name)) != NULL &&
+ if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
(pool_config = zpool_get_config(zhp, NULL))
!= NULL) {
nvlist_t *nvroot;
@@ -770,24 +948,57 @@ zpool_in_use(int fd, pool_state_t *state, char **namestr)
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
ret = find_guid(nvroot, vdev_guid);
} else {
- ret = FALSE;
+ ret = B_FALSE;
}
+
+ if (zhp != NULL)
+ zpool_close(zhp);
} else {
stateval = POOL_STATE_POTENTIALLY_ACTIVE;
+ ret = B_TRUE;
+ }
+ break;
+
+ case POOL_STATE_SPARE:
+ /*
+ * For a hot spare, it can be either definitively in use, or
+ * potentially active. To determine if it's in use, we iterate
+ * over all pools in the system and search for one with a spare
+ * with a matching guid.
+ *
+ * Due to the shared nature of spares, we don't actually report
+ * the potentially active case as in use. This means the user
+ * can freely create pools on the hot spares of exported pools,
+ * but to do otherwise makes the resulting code complicated, and
+ * we end up having to deal with this case anyway.
+ */
+ cb.cb_zhp = NULL;
+ cb.cb_guid = vdev_guid;
+ if (zpool_iter(hdl, find_spare, &cb) == 1) {
+ name = (char *)zpool_get_name(cb.cb_zhp);
ret = TRUE;
+ } else {
+ ret = FALSE;
}
break;
default:
- ret = FALSE;
+ ret = B_FALSE;
}
if (ret) {
- *namestr = zfs_strdup(name);
+ if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
+ nvlist_free(config);
+ return (-1);
+ }
*state = (pool_state_t)stateval;
}
+ if (cb.cb_zhp)
+ zpool_close(cb.cb_zhp);
+
nvlist_free(config);
- return (ret);
+ *inuse = ret;
+ return (0);
}
diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c
index ae4a9937a8..894bcc0d03 100644
--- a/usr/src/lib/libzfs/common/libzfs_mount.c
+++ b/usr/src/lib/libzfs/common/libzfs_mount.c
@@ -63,44 +63,44 @@
#include "libzfs_impl.h"
/*
- * Search the sharetab for the given mountpoint, returning TRUE if it is found.
+ * Search the sharetab for the given mountpoint, returning true if it is found.
*/
-static int
-is_shared(const char *mountpoint)
+static boolean_t
+is_shared(libzfs_handle_t *hdl, const char *mountpoint)
{
char buf[MAXPATHLEN], *tab;
- if (zfs_sharetab() == NULL)
+ if (hdl->libzfs_sharetab == NULL)
return (0);
- (void) fseek(zfs_sharetab(), 0, SEEK_SET);
+ (void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
- while (fgets(buf, sizeof (buf), zfs_sharetab()) != NULL) {
+ while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
/* the mountpoint is the first entry on each line */
if ((tab = strchr(buf, '\t')) != NULL) {
*tab = '\0';
if (strcmp(buf, mountpoint) == 0)
- return (1);
+ return (B_TRUE);
}
}
- return (0);
+ return (B_FALSE);
}
/*
- * Returns TRUE if the specified directory is empty. If we can't open the
- * directory at all, return TRUE so that the mount can fail with a more
+ * Returns true if the specified directory is empty. If we can't open the
+ * directory at all, return true so that the mount can fail with a more
* informative error message.
*/
-static int
+static boolean_t
dir_is_empty(const char *dirname)
{
DIR *dirp;
struct dirent64 *dp;
if ((dirp = opendir(dirname)) == NULL)
- return (TRUE);
+ return (B_TRUE);
while ((dp = readdir64(dirp)) != NULL) {
@@ -109,11 +109,11 @@ dir_is_empty(const char *dirname)
continue;
(void) closedir(dirp);
- return (FALSE);
+ return (B_FALSE);
}
(void) closedir(dirp);
- return (TRUE);
+ return (B_TRUE);
}
/*
@@ -121,7 +121,7 @@ dir_is_empty(const char *dirname)
* in 'where' with the current mountpoint, and return 1. Otherwise, we return
* 0.
*/
-int
+boolean_t
zfs_is_mounted(zfs_handle_t *zhp, char **where)
{
struct mnttab search = { 0 }, entry;
@@ -134,14 +134,14 @@ zfs_is_mounted(zfs_handle_t *zhp, char **where)
search.mnt_special = (char *)zfs_get_name(zhp);
search.mnt_fstype = MNTTYPE_ZFS;
- rewind(zfs_mnttab());
- if (getmntany(zfs_mnttab(), &entry, &search) != 0)
- return (FALSE);
+ rewind(zhp->zfs_hdl->libzfs_mnttab);
+ if (getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) != 0)
+ return (B_FALSE);
if (where != NULL)
- *where = zfs_strdup(entry.mnt_mountp);
+ *where = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
- return (TRUE);
+ return (B_TRUE);
}
/*
@@ -153,6 +153,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
struct stat buf;
char mountpoint[ZFS_MAXPROPLEN];
char mntopts[MNT_LINE_MAX];
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
if (options == NULL)
mntopts[0] = '\0';
@@ -161,7 +162,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
/* ignore non-filesystems */
if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
- sizeof (mountpoint), NULL, NULL, 0, FALSE) != 0)
+ sizeof (mountpoint), NULL, NULL, 0, B_FALSE) != 0)
return (0);
/* return success if there is no mountpoint set */
@@ -173,25 +174,18 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
* If the 'zoned' property is set, and we're in the global zone, simply
* return success.
*/
- if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
- char zonename[ZONENAME_MAX];
- if (getzonenamebyid(getzoneid(), zonename,
- sizeof (zonename)) < 0) {
- zfs_error(dgettext(TEXT_DOMAIN, "internal error: "
- "cannot determine current zone"));
- return (1);
- }
-
- if (strcmp(zonename, "global") == 0)
- return (0);
- }
+ if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
+ getzoneid() == GLOBAL_ZONEID)
+ return (0);
/* Create the directory if it doesn't already exist */
if (lstat(mountpoint, &buf) != 0) {
if (mkdirp(mountpoint, 0755) != 0) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': "
- "unable to create mountpoint"), mountpoint);
- return (1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "failed to create mountpoint"));
+ return (zfs_error(hdl, EZFS_MOUNTFAILED,
+ dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+ mountpoint));
}
}
@@ -204,11 +198,10 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
if ((flags & MS_OVERLAY) == 0 &&
strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
!dir_is_empty(mountpoint)) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': "
- "directory is not empty"), mountpoint);
- zfs_error(dgettext(TEXT_DOMAIN, "use legacy mountpoint to "
- "allow this behavior, or use the -O flag"));
- return (1);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "directory is not empty"));
+ return (zfs_error(hdl, EZFS_MOUNTFAILED,
+ dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
}
/* perform the mount */
@@ -219,24 +212,15 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
* from mount(), and they're well-understood. We pick a few
* common ones to improve upon.
*/
- switch (errno) {
- case EBUSY:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': "
- "mountpoint or dataset is busy"), zhp->zfs_name);
- break;
- case EPERM:
- case EACCES:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': "
- "permission denied"), zhp->zfs_name,
- mountpoint);
- break;
- default:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot mount '%s': %s"),
- mountpoint, strerror(errno));
- break;
- }
- return (1);
+ if (errno == EBUSY)
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "mountpoint or dataset is busy"));
+ else
+ zfs_error_aux(hdl, strerror(errno));
+
+ return (zfs_error(hdl, EZFS_MOUNTFAILED,
+ dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+ zhp->zfs_name));
}
return (0);
@@ -253,9 +237,9 @@ zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
/* check to see if need to unmount the filesystem */
search.mnt_special = (char *)zfs_get_name(zhp);
search.mnt_fstype = MNTTYPE_ZFS;
- rewind(zfs_mnttab());
+ rewind(zhp->zfs_hdl->libzfs_mnttab);
if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
- getmntany(zfs_mnttab(), &entry, &search) == 0)) {
+ getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
if (mountpoint == NULL)
mountpoint = entry.mnt_mountp;
@@ -277,10 +261,10 @@ zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
* semantics from the kernel.
*/
if (umount2(mountpoint, flags) != 0) {
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot unmount '%s': %s"),
- mountpoint, strerror(errno));
- return (-1);
+ zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+ return (zfs_error(zhp->zfs_hdl, EZFS_UMOUNTFAILED,
+ dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
+ mountpoint));
}
/*
@@ -315,23 +299,23 @@ zfs_unmountall(zfs_handle_t *zhp, int flags)
/*
* Check to see if the filesystem is currently shared.
*/
-int
+boolean_t
zfs_is_shared(zfs_handle_t *zhp, char **where)
{
char *mountpoint;
if (!zfs_is_mounted(zhp, &mountpoint))
- return (FALSE);
+ return (B_FALSE);
- if (is_shared(mountpoint)) {
+ if (is_shared(zhp->zfs_hdl, mountpoint)) {
if (where != NULL)
*where = mountpoint;
else
free(mountpoint);
- return (TRUE);
+ return (B_TRUE);
} else {
free(mountpoint);
- return (FALSE);
+ return (B_FALSE);
}
}
@@ -346,6 +330,7 @@ zfs_share(zfs_handle_t *zhp)
char shareopts[ZFS_MAXPROPLEN];
char buf[MAXPATHLEN];
FILE *fp;
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
/* ignore non-filesystems */
if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM)
@@ -353,14 +338,14 @@ zfs_share(zfs_handle_t *zhp)
/* return success if there is no mountpoint set */
if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
- mountpoint, sizeof (mountpoint), NULL, NULL, 0, FALSE) != 0 ||
+ mountpoint, sizeof (mountpoint), NULL, NULL, 0, B_FALSE) != 0 ||
strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) == 0 ||
strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0)
return (0);
/* return success if there are no share options */
if (zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, sizeof (shareopts),
- NULL, NULL, 0, FALSE) != 0 ||
+ NULL, NULL, 0, B_FALSE) != 0 ||
strcmp(shareopts, "off") == 0)
return (0);
@@ -386,11 +371,10 @@ zfs_share(zfs_handle_t *zhp)
"-F nfs -o \"%s\" \"%s\" 2>&1", shareopts,
mountpoint);
- if ((fp = popen(buf, "r")) == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot share '%s': "
- "share(1M) failed"), zfs_get_name(zhp));
- return (-1);
- }
+ if ((fp = popen(buf, "r")) == NULL)
+ return (zfs_error(hdl, EZFS_SHAREFAILED,
+ dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+ zfs_get_name(zhp)));
/*
* share(1M) should only produce output if there is some kind
@@ -403,14 +387,11 @@ zfs_share(zfs_handle_t *zhp)
while (buf[strlen(buf) - 1] == '\n')
buf[strlen(buf) - 1] = '\0';
- if (colon == NULL)
- zfs_error(dgettext(TEXT_DOMAIN, "cannot share "
- "'%s': share(1M) failed"),
- zfs_get_name(zhp));
- else
- zfs_error(dgettext(TEXT_DOMAIN, "cannot share "
- "'%s': %s"), zfs_get_name(zhp),
- colon + 2);
+ if (colon != NULL)
+ zfs_error_aux(hdl, colon + 2);
+
+ (void) zfs_error(hdl, EZFS_SHAREFAILED,
+ dgettext(TEXT_DOMAIN, "cannot share '%s'"));
verify(pclose(fp) != 0);
return (-1);
@@ -429,30 +410,29 @@ zfs_unshare(zfs_handle_t *zhp, const char *mountpoint)
{
char buf[MAXPATHLEN];
struct mnttab search = { 0 }, entry;
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
/* check to see if need to unmount the filesystem */
search.mnt_special = (char *)zfs_get_name(zhp);
search.mnt_fstype = MNTTYPE_ZFS;
- rewind(zfs_mnttab());
+ rewind(zhp->zfs_hdl->libzfs_mnttab);
if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
- getmntany(zfs_mnttab(), &entry, &search) == 0)) {
+ getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
if (mountpoint == NULL)
mountpoint = entry.mnt_mountp;
- if (is_shared(mountpoint)) {
+ if (is_shared(zhp->zfs_hdl, mountpoint)) {
FILE *fp;
(void) snprintf(buf, sizeof (buf),
"/usr/sbin/unshare \"%s\" 2>&1",
mountpoint);
- if ((fp = popen(buf, "r")) == NULL) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot "
- "unshare '%s': unshare(1M) failed"),
- zfs_get_name(zhp));
- return (-1);
- }
+ if ((fp = popen(buf, "r")) == NULL)
+ return (zfs_error(hdl, EZFS_UNSHAREFAILED,
+ dgettext(TEXT_DOMAIN,
+ "cannot unshare '%s'"), zfs_get_name(zhp)));
/*
* unshare(1M) should only produce output if there is
@@ -465,17 +445,14 @@ zfs_unshare(zfs_handle_t *zhp, const char *mountpoint)
while (buf[strlen(buf) - 1] == '\n')
buf[strlen(buf) - 1] = '\0';
- if (colon == NULL)
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot unshare '%s': unshare(1M) "
- "failed"), zfs_get_name(zhp));
- else
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot unshare '%s': %s"),
- zfs_get_name(zhp), colon + 2);
+ if (colon != NULL)
+ zfs_error_aux(hdl, colon + 2);
verify(pclose(fp) != 0);
- return (-1);
+
+ return (zfs_error(hdl, EZFS_UNSHAREFAILED,
+ dgettext(TEXT_DOMAIN,
+ "cannot unshare '%s'"), zfs_get_name(zhp)));
}
verify(pclose(fp) == 0);
@@ -521,24 +498,20 @@ remove_mountpoint(zfs_handle_t *zhp)
char mountpoint[ZFS_MAXPROPLEN];
char source[ZFS_MAXNAMELEN];
zfs_source_t sourcetype;
- char zonename[ZONENAME_MAX];
+ int zoneid = getzoneid();
/* ignore non-filesystems */
if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
sizeof (mountpoint), &sourcetype, source, sizeof (source),
- FALSE) != 0)
+ B_FALSE) != 0)
return;
- if (getzonenamebyid(getzoneid(), zonename, sizeof (zonename)) < 0)
- zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: "
- "cannot determine current zone"));
-
if (strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0 &&
strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
(sourcetype == ZFS_SRC_DEFAULT ||
sourcetype == ZFS_SRC_INHERITED) &&
(!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) ||
- strcmp(zonename, "global") != 0)) {
+ zoneid != GLOBAL_ZONEID)) {
/*
* Try to remove the directory, silently ignoring any errors.
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index 1fe6fa2d27..37c82015b9 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -45,8 +46,8 @@
* Validate the given pool name, optionally putting an extended error message in
* 'buf'.
*/
-static int
-zpool_name_valid(const char *pool, boolean_t isopen, char *buf, size_t buflen)
+static boolean_t
+zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
{
namecheck_err_t why;
char what;
@@ -64,53 +65,52 @@ zpool_name_valid(const char *pool, boolean_t isopen, char *buf, size_t buflen)
(strncmp(pool, "mirror", 6) == 0 ||
strncmp(pool, "raidz", 5) == 0 ||
strncmp(pool, "spare", 5) == 0)) {
- ret = -1;
- why = NAME_ERR_RESERVED;
+ zfs_error_aux(hdl,
+ dgettext(TEXT_DOMAIN, "name is reserved"));
+ return (B_FALSE);
}
if (ret != 0) {
- if (buf != NULL) {
+ if (hdl != NULL) {
switch (why) {
case NAME_ERR_TOOLONG:
- (void) snprintf(buf, buflen,
+ zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "name is too long"));
break;
case NAME_ERR_INVALCHAR:
- (void) snprintf(buf, buflen,
+ zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "invalid character "
"'%c' in pool name"), what);
break;
case NAME_ERR_NOLETTER:
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "name must begin with a letter"), buflen);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "name must begin with a letter"));
break;
case NAME_ERR_RESERVED:
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "name is reserved\n"
- "pool name may have been omitted"), buflen);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "name is reserved"));
break;
case NAME_ERR_DISKLIKE:
- (void) strlcpy(buf, dgettext(TEXT_DOMAIN,
- "pool name is reserved\n"
- "pool name may have been omitted"), buflen);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool name is reserved"));
break;
}
}
- return (FALSE);
+ return (B_FALSE);
}
- return (TRUE);
+ return (B_TRUE);
}
/*
* Set the pool-wide health based on the vdev state of the root vdev.
*/
-void
+int
set_pool_health(nvlist_t *config)
{
nvlist_t *nvroot;
@@ -140,11 +140,10 @@ set_pool_health(nvlist_t *config)
break;
default:
- zfs_baderror(vs->vs_state);
+ abort();
}
- verify(nvlist_add_string(config, ZPOOL_CONFIG_POOL_HEALTH,
- health) == 0);
+ return (nvlist_add_string(config, ZPOOL_CONFIG_POOL_HEALTH, health));
}
/*
@@ -152,28 +151,33 @@ set_pool_health(nvlist_t *config)
* state.
*/
zpool_handle_t *
-zpool_open_canfail(const char *pool)
+zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
{
zpool_handle_t *zhp;
- int error;
/*
* Make sure the pool name is valid.
*/
- if (!zpool_name_valid(pool, B_TRUE, NULL, 0)) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': invalid "
- "pool name"), pool);
+ if (!zpool_name_valid(hdl, B_TRUE, pool)) {
+ (void) zfs_error(hdl, EZFS_INVALIDNAME,
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+ pool);
return (NULL);
}
- zhp = zfs_malloc(sizeof (zpool_handle_t));
+ if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
+ return (NULL);
+ zhp->zpool_hdl = hdl;
(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
- if ((error = zpool_refresh_stats(zhp)) != 0) {
- if (error == ENOENT || error == EINVAL) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': no "
- "such pool"), pool);
+ if (zpool_refresh_stats(zhp) != 0) {
+ if (errno == ENOENT || errno == EINVAL) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "no such pool"));
+ (void) zfs_error(hdl, EZFS_NOENT,
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+ pool);
free(zhp);
return (NULL);
} else {
@@ -191,17 +195,18 @@ zpool_open_canfail(const char *pool)
* the configuration cache may be out of date).
*/
zpool_handle_t *
-zpool_open_silent(const char *pool)
+zpool_open_silent(libzfs_handle_t *hdl, const char *pool)
{
zpool_handle_t *zhp;
- int error;
- zhp = zfs_malloc(sizeof (zpool_handle_t));
+ if ((zhp = calloc(sizeof (zpool_handle_t), 1)) == NULL)
+ return (NULL);
+ zhp->zpool_hdl = hdl;
(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
- if ((error = zpool_refresh_stats(zhp)) != 0) {
- if (error == ENOENT || error == EINVAL) {
+ if (zpool_refresh_stats(zhp) != 0) {
+ if (errno == ENOENT || errno == EINVAL) {
free(zhp);
return (NULL);
} else {
@@ -219,18 +224,16 @@ zpool_open_silent(const char *pool)
* state.
*/
zpool_handle_t *
-zpool_open(const char *pool)
+zpool_open(libzfs_handle_t *hdl, const char *pool)
{
zpool_handle_t *zhp;
- if ((zhp = zpool_open_canfail(pool)) == NULL)
+ if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
return (NULL);
if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': pool is "
- "currently unavailable"), zhp->zpool_name);
- zfs_error(dgettext(TEXT_DOMAIN, "run 'zpool status %s' for "
- "detailed information"), zhp->zpool_name);
+ (void) zfs_error(hdl, EZFS_POOLUNAVAIL,
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
zpool_close(zhp);
return (NULL);
}
@@ -251,7 +254,7 @@ zpool_close(zpool_handle_t *zhp)
if (zhp->zpool_error_log) {
int i;
for (i = 0; i < zhp->zpool_error_count; i++)
- free(zhp->zpool_error_log[i]);
+ nvlist_free(zhp->zpool_error_log[i]);
free(zhp->zpool_error_log);
}
free(zhp);
@@ -280,6 +283,20 @@ zpool_get_guid(zpool_handle_t *zhp)
}
/*
+ * Return the version of the pool.
+ */
+uint64_t
+zpool_get_version(zpool_handle_t *zhp)
+{
+ uint64_t version;
+
+ verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_VERSION,
+ &version) == 0);
+
+ return (version);
+}
+
+/*
* Return the amount of space currently consumed by the pool.
*/
uint64_t
@@ -324,7 +341,7 @@ zpool_get_root(zpool_handle_t *zhp, char *buf, size_t buflen)
zfs_cmd_t zc = { 0 };
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
zc.zc_root[0] == '\0')
return (-1);
@@ -348,34 +365,35 @@ zpool_get_state(zpool_handle_t *zhp)
* don't have to worry about error semantics.
*/
int
-zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
+zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
+ const char *altroot)
{
zfs_cmd_t zc = { 0 };
char *packed;
size_t len;
- int err;
- char reason[64];
+ char msg[1024];
- if (!zpool_name_valid(pool, B_FALSE, reason, sizeof (reason))) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': %s"),
- pool, reason);
- return (-1);
- }
+ (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ "cannot create '%s'"), pool);
- if (altroot != NULL && altroot[0] != '/') {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': alternate "
- "root '%s' must be a complete path"), pool, altroot);
- return (-1);
- }
+ if (!zpool_name_valid(hdl, B_FALSE, pool))
+ return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
- if ((err = nvlist_size(nvroot, &len, NV_ENCODE_NATIVE)) != 0)
- zfs_baderror(err);
+ if (altroot != NULL && altroot[0] != '/')
+ return (zfs_error(hdl, EZFS_BADPATH,
+ dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), altroot));
- packed = zfs_malloc(len);
+ if (nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) != 0)
+ return (no_memory(hdl));
- if ((err = nvlist_pack(nvroot, &packed, &len,
- NV_ENCODE_NATIVE, 0)) != 0)
- zfs_baderror(err);
+ if ((packed = zfs_alloc(hdl, len)) == NULL)
+ return (-1);
+
+ if (nvlist_pack(nvroot, &packed, &len,
+ NV_ENCODE_NATIVE, 0) != 0) {
+ free(packed);
+ return (no_memory(hdl));
+ }
(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
zc.zc_config_src = (uint64_t)(uintptr_t)packed;
@@ -384,18 +402,10 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
if (altroot != NULL)
(void) strlcpy(zc.zc_root, altroot, sizeof (zc.zc_root));
- if (zfs_ioctl(ZFS_IOC_POOL_CREATE, &zc) != 0) {
- switch (errno) {
- case EEXIST:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "pool exists"), pool);
- break;
-
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "permission denied"), pool);
- break;
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CREATE, &zc) != 0) {
+ free(packed);
+ switch (errno) {
case EBUSY:
/*
* This can happen if the user has specified the same
@@ -403,14 +413,13 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
* until we try to add it and see we already have a
* label.
*/
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "one or more vdevs refer to the same device"),
- pool);
- break;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more vdevs refer to the same device"));
+ return (zfs_error(hdl, EZFS_BADDEV, msg));
case EOVERFLOW:
/*
- * This occurrs when one of the devices is below
+ * This occurs when one of the devices is below
* SPA_MINDEVSIZE. Unfortunately, we can't detect which
* device was the problem device since there's no
* reliable way to determine device size from userland.
@@ -420,53 +429,20 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
- zfs_error(dgettext(TEXT_DOMAIN, "cannot "
- "create '%s': one or more devices is less "
- "than the minimum size (%s)"), pool,
- buf);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more devices is less than the "
+ "minimum size (%s)"), buf);
}
- break;
-
- case ENAMETOOLONG:
- /*
- * One of the vdevs has exceeded VDEV_SPEC_MAX length in
- * its plaintext representation.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "too many devices in a single vdev"), pool);
- break;
-
- case EIO:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "I/O error on one or more devices"), pool);
- break;
-
- case ENXIO:
- /*
- * This is unlikely to happen since we've verified that
- * all the devices can be opened from userland, but it's
- * still possible in some circumstances.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "one or more devices is unavailable"), pool);
- break;
+ return (zfs_error(hdl, EZFS_BADDEV, msg));
case ENOSPC:
- /*
- * This can occur if we were incapable of writing to a
- * file vdev because the underlying filesystem is out of
- * space. This is very similar to EOVERFLOW, but we'll
- * produce a slightly different message.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
- "one or more devices is out of space"), pool);
- break;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more devices is out of space"));
+ return (zfs_error(hdl, EZFS_BADDEV, msg));
default:
- zfs_baderror(errno);
+ return (zpool_standard_error(hdl, errno, msg));
}
-
- return (-1);
}
free(packed);
@@ -478,7 +454,7 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
if (altroot != NULL) {
zfs_handle_t *zhp;
- verify((zhp = zfs_open(pool, ZFS_TYPE_ANY)) != NULL);
+ verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_ANY)) != NULL);
verify(zfs_prop_set(zhp, ZFS_PROP_MOUNTPOINT, "/") == 0);
zfs_close(zhp);
@@ -496,9 +472,12 @@ zpool_destroy(zpool_handle_t *zhp)
{
zfs_cmd_t zc = { 0 };
zfs_handle_t *zfp = NULL;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+ char msg[1024];
if (zhp->zpool_state == POOL_STATE_ACTIVE &&
- (zfp = zfs_open(zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
+ (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
+ ZFS_TYPE_FILESYSTEM)) == NULL)
return (-1);
if (zpool_remove_zvol_links(zhp) != NULL)
@@ -506,35 +485,16 @@ zpool_destroy(zpool_handle_t *zhp)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (zfs_ioctl(ZFS_IOC_POOL_DESTROY, &zc) != 0) {
- switch (errno) {
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot destroy '%s': permission denied"),
- zhp->zpool_name);
- break;
-
- case EBUSY:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot destroy '%s': pool busy"),
- zhp->zpool_name);
- break;
-
- case ENOENT:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot destroy '%s': no such pool"),
- zhp->zpool_name);
- break;
-
- case EROFS:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot destroy '%s': one or more devices is "
- "read only, or '/' is mounted read only"),
- zhp->zpool_name);
- break;
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
+ (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ "cannot destroy '%s'"), zhp->zpool_name);
- default:
- zfs_baderror(errno);
+ if (errno == EROFS) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more devices is read only"));
+ (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ } else {
+ (void) zpool_standard_error(hdl, errno, msg);
}
if (zfp)
@@ -560,10 +520,27 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
char *packed;
size_t len;
zfs_cmd_t zc;
+ int ret;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+ char msg[1024];
+ nvlist_t **spares;
+ uint_t nspares;
+
+ (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+ "cannot add to '%s'"), zhp->zpool_name);
+
+ if (zpool_get_version(zhp) < ZFS_VERSION_SPARES &&
+ nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
+ "upgraded to add hot spares"));
+ return (zfs_error(hdl, EZFS_BADVERSION, msg));
+ }
verify(nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) == 0);
- packed = zfs_malloc(len);
+ if ((packed = zfs_alloc(zhp->zpool_hdl, len)) == NULL)
+ return (-1);
verify(nvlist_pack(nvroot, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
@@ -571,13 +548,8 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
zc.zc_config_src = (uint64_t)(uintptr_t)packed;
zc.zc_config_src_size = len;
- if (zfs_ioctl(ZFS_IOC_VDEV_ADD, &zc) != 0) {
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ADD, &zc) != 0) {
switch (errno) {
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': "
- "permission denied"), zhp->zpool_name);
- break;
-
case EBUSY:
/*
* This can happen if the user has specified the same
@@ -585,30 +557,9 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
* until we try to add it and see we already have a
* label.
*/
- zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': "
- "one or more vdevs refer to the same device"),
- zhp->zpool_name);
- break;
-
- case ENAMETOOLONG:
- /*
- * One of the vdevs has exceeded VDEV_SPEC_MAX length in
- * its plaintext representation.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': "
- "too many devices in a single vdev"),
- zhp->zpool_name);
- break;
-
- case ENXIO:
- /*
- * This is unlikely to happen since we've verified that
- * all the devices can be opened from userland, but it's
- * still possible in some circumstances.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': "
- "one or more devices is unavailable"),
- zhp->zpool_name);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more vdevs refer to the same device"));
+ (void) zfs_error(hdl, EZFS_BADDEV, msg);
break;
case EOVERFLOW:
@@ -623,23 +574,31 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
- zfs_error(dgettext(TEXT_DOMAIN, "cannot "
- "add to '%s': one or more devices is less "
- "than the minimum size (%s)"),
- zhp->zpool_name, buf);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "device is less than the minimum "
+ "size (%s)"), buf);
}
+ (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ break;
+
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgraded to add raidz2 vdevs"));
+ (void) zfs_error(hdl, EZFS_BADVERSION, msg);
break;
default:
- zfs_baderror(errno);
+ (void) zpool_standard_error(hdl, errno, msg);
}
- return (-1);
+ ret = -1;
+ } else {
+ ret = 0;
}
free(packed);
- return (0);
+ return (ret);
}
/*
@@ -656,32 +615,10 @@ zpool_export(zpool_handle_t *zhp)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (zfs_ioctl(ZFS_IOC_POOL_EXPORT, &zc) != 0) {
- switch (errno) {
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot export '%s': permission denied"),
- zhp->zpool_name);
- break;
-
- case EBUSY:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot export '%s': pool is in use"),
- zhp->zpool_name);
- break;
-
- case ENOENT:
- zfs_error(dgettext(TEXT_DOMAIN,
- "cannot export '%s': no such pool"),
- zhp->zpool_name);
- break;
-
- default:
- zfs_baderror(errno);
- }
-
- return (-1);
- }
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_EXPORT, &zc) != 0)
+ return (zpool_standard_error(zhp->zpool_hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot export '%s'"),
+ zhp->zpool_name));
return (0);
}
@@ -693,7 +630,8 @@ zpool_export(zpool_handle_t *zhp)
* an alternate root, respectively.
*/
int
-zpool_import(nvlist_t *config, const char *newname, const char *altroot)
+zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
+ const char *altroot)
{
zfs_cmd_t zc;
char *packed;
@@ -706,22 +644,19 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
&origname) == 0);
if (newname != NULL) {
- if (!zpool_name_valid(newname, B_FALSE, NULL, 0)) {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot import '%s': "
- "invalid pool name"), newname);
- return (-1);
- }
+ if (!zpool_name_valid(hdl, B_FALSE, newname))
+ return (zfs_error(hdl, EZFS_INVALIDNAME,
+ dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+ newname));
thename = (char *)newname;
} else {
thename = origname;
}
- if (altroot != NULL && altroot[0] != '/') {
- zfs_error(dgettext(TEXT_DOMAIN, "cannot import '%s': alternate "
- "root '%s' must be a complete path"), thename,
- altroot);
- return (-1);
- }
+ if (altroot != NULL && altroot[0] != '/')
+ return (zfs_error(hdl, EZFS_BADPATH,
+ dgettext(TEXT_DOMAIN, "bad alternate root '%s'"),
+ altroot));
(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
@@ -735,7 +670,8 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
verify(nvlist_size(config, &len, NV_ENCODE_NATIVE) == 0);
- packed = zfs_malloc(len);
+ if ((packed = zfs_alloc(hdl, len)) == NULL)
+ return (-1);
verify(nvlist_pack(config, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
@@ -743,7 +679,7 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
zc.zc_config_src_size = len;
ret = 0;
- if (zfs_ioctl(ZFS_IOC_POOL_IMPORT, &zc) != 0) {
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
char desc[1024];
if (newname == NULL)
(void) snprintf(desc, sizeof (desc),
@@ -755,42 +691,15 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
origname, thename);
switch (errno) {
- case EEXIST:
- /*
- * A pool with that name already exists.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: pool exists"),
- desc);
- break;
-
- case EPERM:
- /*
- * The user doesn't have permission to create pools.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: permission "
- "denied"), desc);
- break;
-
- case ENXIO:
- case EDOM:
- /*
- * Device is unavailable, or vdev sum didn't match.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: one or more "
- "devices is unavailable"),
- desc);
- break;
-
case ENOTSUP:
/*
* Unsupported version.
*/
- zfs_error(dgettext(TEXT_DOMAIN,
- "%s: unsupported version"), desc);
+ (void) zfs_error(hdl, EZFS_BADVERSION, desc);
break;
default:
- zfs_baderror(errno);
+ (void) zpool_standard_error(hdl, errno, desc);
}
ret = -1;
@@ -799,7 +708,7 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
/*
* This should never fail, but play it safe anyway.
*/
- if ((zhp = zpool_open_silent(thename)) != NULL) {
+ if ((zhp = zpool_open_silent(hdl, thename)) != NULL) {
ret = zpool_create_zvol_links(zhp);
zpool_close(zhp);
}
@@ -817,48 +726,35 @@ zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_cookie = type;
- if (zfs_ioctl(ZFS_IOC_POOL_SCRUB, &zc) == 0)
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SCRUB, &zc) == 0)
return (0);
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
- switch (errno) {
- case EPERM:
- /*
- * No permission to scrub this pool.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
- break;
-
- case EBUSY:
- /*
- * Resilver in progress.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: currently resilvering"),
- msg);
- break;
-
- default:
- zfs_baderror(errno);
- }
- return (-1);
+ if (errno == EBUSY)
+ return (zfs_error(hdl, EZFS_RESILVERING, msg));
+ else
+ return (zpool_standard_error(hdl, errno, msg));
}
-static uint64_t
-vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid)
+static nvlist_t *
+vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
+ boolean_t *isspare)
{
uint_t c, children;
nvlist_t **child;
- uint64_t ret, present;
+ uint64_t theguid, present;
char *path;
uint64_t wholedisk = 0;
+ nvlist_t *ret;
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &ret) == 0);
+ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
if (search == NULL &&
nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
@@ -866,8 +762,8 @@ vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid)
* If the device has never been present since import, the only
* reliable way to match the vdev is by GUID.
*/
- if (ret == guid)
- return (ret);
+ if (theguid == guid)
+ return (nv);
} else if (search != NULL &&
nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
@@ -879,28 +775,37 @@ vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid)
*/
if (strlen(search) == strlen(path) - 2 &&
strncmp(search, path, strlen(search)) == 0)
- return (ret);
+ return (nv);
} else if (strcmp(search, path) == 0) {
- return (ret);
+ return (nv);
}
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
- return (0);
+ return (NULL);
for (c = 0; c < children; c++)
- if ((ret = vdev_to_guid(child[c], search, guid)) != 0)
+ if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+ isspare)) != NULL)
return (ret);
- return (0);
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++) {
+ if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+ isspare)) != NULL) {
+ *isspare = B_TRUE;
+ return (ret);
+ }
+ }
+ }
+
+ return (NULL);
}
-/*
- * Given a string describing a vdev, returns the matching GUID, or 0 if none.
- */
-uint64_t
-zpool_vdev_to_guid(zpool_handle_t *zhp, const char *path)
+nvlist_t *
+zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *isspare)
{
char buf[MAXPATHLEN];
const char *search;
@@ -921,7 +826,8 @@ zpool_vdev_to_guid(zpool_handle_t *zhp, const char *path)
verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
- return (vdev_to_guid(nvroot, search, guid));
+ *isspare = B_FALSE;
+ return (vdev_to_nvlist_iter(nvroot, search, guid, isspare));
}
/*
@@ -932,39 +838,26 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
+ nvlist_t *tgt;
+ boolean_t isspare;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot online %s"), path);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) {
- zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
- msg);
- return (-1);
- }
+ if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == NULL)
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
- if (zfs_ioctl(ZFS_IOC_VDEV_ONLINE, &zc) == 0)
- return (0);
+ if (isspare)
+ return (zfs_error(hdl, EZFS_ISSPARE, msg));
- switch (errno) {
- case ENODEV:
- /*
- * Device doesn't exist
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg);
- break;
+ verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
- case EPERM:
- /*
- * No permission to bring this vdev online.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
- break;
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ONLINE, &zc) == 0)
+ return (0);
- default:
- zfs_baderror(errno);
- }
- return (-1);
+ return (zpool_standard_error(hdl, errno, msg));
}
/*
@@ -975,48 +868,66 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, int istmp)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
+ nvlist_t *tgt;
+ boolean_t isspare;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) {
- zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
- msg);
- return (-1);
- }
+ if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == NULL)
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+ if (isspare)
+ return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+ verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
zc.zc_cookie = istmp;
- if (zfs_ioctl(ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
return (0);
switch (errno) {
- case ENODEV:
- /*
- * Device doesn't exist
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg);
- break;
-
- case EPERM:
- /*
- * No permission to take this vdev offline.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
- break;
+ case EBUSY:
- case EBUSY:
/*
* There are no other replicas of this device.
*/
- zfs_error(dgettext(TEXT_DOMAIN, "%s: no valid replicas"), msg);
- break;
+ return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
- default:
- zfs_baderror(errno);
+ default:
+ return (zpool_standard_error(hdl, errno, msg));
}
- return (-1);
+}
+
+/*
+ * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
+ * a hot spare.
+ */
+static boolean_t
+is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
+{
+ nvlist_t **child;
+ uint_t c, children;
+ char *type;
+
+ if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) == 0) {
+ verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
+ &type) == 0);
+
+ if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+ children == 2 && child[which] == tgt)
+ return (B_TRUE);
+
+ for (c = 0; c < children; c++)
+ if (is_replacing_spare(child[c], tgt, which))
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
}
/*
@@ -1032,6 +943,14 @@ zpool_vdev_attach(zpool_handle_t *zhp,
char *packed;
int ret;
size_t len;
+ nvlist_t *tgt;
+ boolean_t isspare;
+ uint64_t val;
+ char *path;
+ nvlist_t **child;
+ uint_t children;
+ nvlist_t *config_root;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
if (replacing)
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
@@ -1041,23 +960,63 @@ zpool_vdev_attach(zpool_handle_t *zhp,
"cannot attach %s to %s"), new_disk, old_disk);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((zc.zc_guid = zpool_vdev_to_guid(zhp, old_disk)) == 0) {
- zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
- msg);
- return (-1);
- }
+ if ((tgt = zpool_find_vdev(zhp, old_disk, &isspare)) == 0)
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+ if (isspare)
+ return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+ verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
zc.zc_cookie = replacing;
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0 || children != 1) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "new device must be a single disk"));
+ return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
+ }
+
+ verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+ ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
+
+ /*
+ * If the target is a hot spare that has been swapped in, we can only
+ * replace it with another hot spare.
+ */
+ if (replacing &&
+ nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
+ nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
+ (zpool_find_vdev(zhp, path, &isspare) == NULL || !isspare) &&
+ is_replacing_spare(config_root, tgt, 1)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "can only be replaced by another hot spare"));
+ return (zfs_error(hdl, EZFS_BADTARGET, msg));
+ }
+
+ /*
+ * If we are attempting to replace a spare, it canot be applied to an
+ * already spared device.
+ */
+ if (replacing &&
+ nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
+ zpool_find_vdev(zhp, path, &isspare) != NULL && isspare &&
+ is_replacing_spare(config_root, tgt, 0)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "device has already been replaced with a spare"));
+ return (zfs_error(hdl, EZFS_BADTARGET, msg));
+ }
+
verify(nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) == 0);
- packed = zfs_malloc(len);
+ if ((packed = zfs_alloc(zhp->zpool_hdl, len)) == NULL)
+ return (-1);
verify(nvlist_pack(nvroot, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
zc.zc_config_src = (uint64_t)(uintptr_t)packed;
zc.zc_config_src_size = len;
- ret = zfs_ioctl(ZFS_IOC_VDEV_ATTACH, &zc);
+ ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc);
free(packed);
@@ -1065,87 +1024,65 @@ zpool_vdev_attach(zpool_handle_t *zhp,
return (0);
switch (errno) {
- case EPERM:
- /*
- * No permission to mess with the config.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
- break;
-
- case ENODEV:
- /*
- * Device doesn't exist.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: %s not in pool"),
- msg, old_disk);
- break;
-
case ENOTSUP:
/*
* Can't attach to or replace this type of vdev.
*/
if (replacing)
- zfs_error(dgettext(TEXT_DOMAIN,
- "%s: cannot replace a replacing device"), msg);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "cannot replace a replacing device"));
else
- zfs_error(dgettext(TEXT_DOMAIN,
- "%s: attach is only applicable to mirrors"), msg);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "can only attach to mirrors and top-level "
+ "disks"));
+ (void) zfs_error(hdl, EZFS_BADTARGET, msg);
break;
case EINVAL:
/*
* The new device must be a single disk.
*/
- zfs_error(dgettext(TEXT_DOMAIN,
- "%s: <new_device> must be a single disk"), msg);
- break;
-
- case ENXIO:
- /*
- * This is unlikely to happen since we've verified that
- * all the devices can be opened from userland, but it's
- * still possible in some circumstances.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: %s is unavailable"),
- msg, new_disk);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "new device must be a single disk"));
+ (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
break;
case EBUSY:
- /*
- * The new device is is use.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: %s busy"), msg, new_disk);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
+ new_disk);
+ (void) zfs_error(hdl, EZFS_BADDEV, msg);
break;
case EOVERFLOW:
/*
* The new device is too small.
*/
- zfs_error(dgettext(TEXT_DOMAIN, "%s: %s is too small"),
- msg, new_disk);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "device is too small"));
+ (void) zfs_error(hdl, EZFS_BADDEV, msg);
break;
case EDOM:
/*
* The new device has a different alignment requirement.
*/
- zfs_error(dgettext(TEXT_DOMAIN,
- "%s: devices have different sector alignment"), msg);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "devices have different sector alignment"));
+ (void) zfs_error(hdl, EZFS_BADDEV, msg);
break;
case ENAMETOOLONG:
/*
* The resulting top-level vdev spec won't fit in the label.
*/
- zfs_error(dgettext(TEXT_DOMAIN,
- "%s: too many devices in a single vdev"), msg);
+ (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
break;
default:
- zfs_baderror(errno);
+ (void) zpool_standard_error(hdl, errno, msg);
}
- return (1);
+ return (-1);
}
/*
@@ -1156,55 +1093,81 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
+ nvlist_t *tgt;
+ boolean_t isspare;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) {
- zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
- msg);
- return (-1);
- }
+ if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0)
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
- if (zfs_ioctl(ZFS_IOC_VDEV_DETACH, &zc) == 0)
+ if (isspare)
+ return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+ verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_DETACH, &zc) == 0)
return (0);
switch (errno) {
- case EPERM:
- /*
- * No permission to mess with the config.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
- break;
-
- case ENODEV:
- /*
- * Device doesn't exist.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg);
- break;
case ENOTSUP:
/*
* Can't detach from this type of vdev.
*/
- zfs_error(dgettext(TEXT_DOMAIN,
- "%s: only applicable to mirror and replacing vdevs"), msg);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
+ "applicable to mirror and replacing vdevs"));
+ (void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
break;
case EBUSY:
/*
* There are no other replicas of this device.
*/
- zfs_error(dgettext(TEXT_DOMAIN, "%s: no valid replicas"), msg);
+ (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
break;
default:
- zfs_baderror(errno);
+ (void) zpool_standard_error(hdl, errno, msg);
}
- return (1);
+ return (-1);
+}
+
+/*
+ * Remove the given device. Currently, this is supported only for hot spares.
+ */
+int
+zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
+{
+ zfs_cmd_t zc = { 0 };
+ char msg[1024];
+ nvlist_t *tgt;
+ boolean_t isspare;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
+
+ (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+ if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0)
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+ if (!isspare) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "only hot spares can be removed"));
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
+ }
+
+ verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
+ return (0);
+
+ return (zpool_standard_error(hdl, errno, msg));
}
/*
@@ -1215,6 +1178,9 @@ zpool_clear(zpool_handle_t *zhp, const char *path)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
+ nvlist_t *tgt;
+ boolean_t isspare;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
if (path)
(void) snprintf(msg, sizeof (msg),
@@ -1226,35 +1192,21 @@ zpool_clear(zpool_handle_t *zhp, const char *path)
zhp->zpool_name);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (path && (zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) {
- zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
- msg);
- return (-1);
- }
+ if (path) {
+ if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0)
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
- if (zfs_ioctl(ZFS_IOC_CLEAR, &zc) == 0)
- return (0);
+ if (isspare)
+ return (zfs_error(hdl, EZFS_ISSPARE, msg));
- switch (errno) {
- case EPERM:
- /*
- * No permission to mess with the config.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
- break;
-
- case ENODEV:
- /*
- * Device doesn't exist.
- */
- zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg);
- break;
-
- default:
- zfs_baderror(errno);
+ verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+ &zc.zc_guid) == 0);
}
- return (1);
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
+ return (0);
+
+ return (zpool_standard_error(hdl, errno, msg));
}
static int
@@ -1269,9 +1221,9 @@ do_zvol(zfs_handle_t *zhp, void *data)
*/
if (zhp->zfs_volblocksize != 0) {
if (linktype)
- ret = zvol_create_link(zhp->zfs_name);
+ ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
else
- ret = zvol_remove_link(zhp->zfs_name);
+ ret = zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
}
ret = zfs_iter_children(zhp, do_zvol, data);
@@ -1292,10 +1244,11 @@ zpool_create_zvol_links(zpool_handle_t *zhp)
/*
* If the pool is unavailable, just return success.
*/
- if ((zfp = make_dataset_handle(zhp->zpool_name)) == NULL)
+ if ((zfp = make_dataset_handle(zhp->zpool_hdl,
+ zhp->zpool_name)) == NULL)
return (0);
- ret = zfs_iter_children(zfp, do_zvol, (void *)TRUE);
+ ret = zfs_iter_children(zfp, do_zvol, (void *)B_TRUE);
zfs_close(zfp);
return (ret);
@@ -1313,10 +1266,11 @@ zpool_remove_zvol_links(zpool_handle_t *zhp)
/*
* If the pool is unavailable, just return success.
*/
- if ((zfp = make_dataset_handle(zhp->zpool_name)) == NULL)
+ if ((zfp = make_dataset_handle(zhp->zpool_hdl,
+ zhp->zpool_name)) == NULL)
return (0);
- ret = zfs_iter_children(zfp, do_zvol, (void *)FALSE);
+ ret = zfs_iter_children(zfp, do_zvol, (void *)B_FALSE);
zfs_close(zfp);
return (ret);
@@ -1345,7 +1299,9 @@ devid_to_path(char *devid_str)
if (ret != 0)
return (NULL);
- path = zfs_strdup(list[0].devname);
+ if ((path = strdup(list[0].devname)) == NULL)
+ return (NULL);
+
devid_free_nmlist(list);
return (path);
@@ -1393,7 +1349,7 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
&zc.zc_guid) == 0);
- (void) zfs_ioctl(ZFS_IOC_VDEV_SETPATH, &zc);
+ (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
}
/*
@@ -1412,7 +1368,7 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
* of these checks.
*/
char *
-zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv)
+zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
{
char *path, *devid;
uint64_t value;
@@ -1442,17 +1398,17 @@ zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv)
* Update the path appropriately.
*/
set_path(zhp, nv, newpath);
- verify(nvlist_add_string(nv,
- ZPOOL_CONFIG_PATH, newpath) == 0);
+ if (nvlist_add_string(nv,
+ ZPOOL_CONFIG_PATH, newpath) == 0)
+ verify(nvlist_lookup_string(nv,
+ ZPOOL_CONFIG_PATH,
+ &path) == 0);
free(newpath);
- verify(nvlist_lookup_string(nv,
- ZPOOL_CONFIG_PATH, &path) == 0);
}
-
- if (newdevid)
- devid_str_free(newdevid);
}
+ if (newdevid)
+ devid_str_free(newdevid);
}
if (strncmp(path, "/dev/dsk/", 9) == 0)
@@ -1460,15 +1416,28 @@ zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv)
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
&value) == 0 && value) {
- char *tmp = zfs_strdup(path);
+ char *tmp = zfs_strdup(hdl, path);
+ if (tmp == NULL)
+ return (NULL);
tmp[strlen(path) - 2] = '\0';
return (tmp);
}
} else {
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
+
+ /*
+ * If it's a raidz device, we need to stick in the parity level.
+ */
+ if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
+ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
+ &value) == 0);
+ (void) snprintf(buf, sizeof (buf), "%s%llu", path,
+ value);
+ path = buf;
+ }
}
- return (zfs_strdup(path));
+ return (zfs_strdup(hdl, path));
}
static int
@@ -1502,15 +1471,20 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
*/
verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
&count) == 0);
- zc.zc_config_dst = (uintptr_t)zfs_malloc(count * sizeof (zbookmark_t));
+ if ((zc.zc_config_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
+ count * sizeof (zbookmark_t))) == NULL)
+ return (-1);
zc.zc_config_dst_size = count;
(void) strcpy(zc.zc_name, zhp->zpool_name);
for (;;) {
- if (zfs_ioctl(ZFS_IOC_ERROR_LOG, &zc) != 0) {
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
+ &zc) != 0) {
+ free((void *)(uintptr_t)zc.zc_config_dst);
if (errno == ENOMEM) {
- free((void *)(uintptr_t)zc.zc_config_dst);
- zc.zc_config_dst = (uintptr_t)
- zfs_malloc(zc.zc_config_dst_size);
+ if ((zc.zc_config_dst = (uintptr_t)
+ zfs_alloc(zhp->zpool_hdl,
+ zc.zc_config_dst_size)) == NULL)
+ return (-1);
} else {
return (-1);
}
@@ -1549,6 +1523,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
*/
if (list == NULL) {
*nelem = j;
+ free((void *)(uintptr_t)zc.zc_config_dst);
return (0);
}
@@ -1557,7 +1532,11 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
/*
* Allocate an array of nvlists to hold the results
*/
- zhp->zpool_error_log = zfs_malloc(j * sizeof (nvlist_t *));
+ if ((zhp->zpool_error_log = zfs_alloc(zhp->zpool_hdl,
+ j * sizeof (nvlist_t *))) == NULL) {
+ free((void *)(uintptr_t)zc.zc_config_dst);
+ return (-1);
+ }
/*
* Fill in the results with names from the kernel.
@@ -1571,31 +1550,37 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
sizeof (zbookmark_t)) == 0)
continue;
- verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
- 0) == 0);
+ if (nvlist_alloc(&nv, NV_UNIQUE_NAME,
+ 0) != 0)
+ goto nomem;
zhp->zpool_error_log[j] = nv;
zc.zc_bookmark = zb[i];
- if (zfs_ioctl(ZFS_IOC_BOOKMARK_NAME, &zc) == 0) {
- verify(nvlist_add_string(nv, ZPOOL_ERR_DATASET,
- zc.zc_prop_name) == 0);
- verify(nvlist_add_string(nv, ZPOOL_ERR_OBJECT,
- zc.zc_prop_value) == 0);
- verify(nvlist_add_string(nv, ZPOOL_ERR_RANGE,
- zc.zc_filename) == 0);
+ if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_BOOKMARK_NAME,
+ &zc) == 0) {
+ if (nvlist_add_string(nv, ZPOOL_ERR_DATASET,
+ zc.zc_prop_name) != 0 ||
+ nvlist_add_string(nv, ZPOOL_ERR_OBJECT,
+ zc.zc_prop_value) != 0 ||
+ nvlist_add_string(nv, ZPOOL_ERR_RANGE,
+ zc.zc_filename) != 0)
+ goto nomem;
} else {
(void) snprintf(buf, sizeof (buf), "%llx",
zb[i].zb_objset);
- verify(nvlist_add_string(nv,
- ZPOOL_ERR_DATASET, buf) == 0);
+ if (nvlist_add_string(nv,
+ ZPOOL_ERR_DATASET, buf) != 0)
+ goto nomem;
(void) snprintf(buf, sizeof (buf), "%llx",
zb[i].zb_object);
- verify(nvlist_add_string(nv, ZPOOL_ERR_OBJECT,
- buf) == 0);
+ if (nvlist_add_string(nv, ZPOOL_ERR_OBJECT,
+ buf) != 0)
+ goto nomem;
(void) snprintf(buf, sizeof (buf), "lvl=%u blkid=%llu",
(int)zb[i].zb_level, (long long)zb[i].zb_blkid);
- verify(nvlist_add_string(nv, ZPOOL_ERR_RANGE,
- buf) == 0);
+ if (nvlist_add_string(nv, ZPOOL_ERR_RANGE,
+ buf) != 0)
+ goto nomem;
}
j++;
@@ -1607,6 +1592,16 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
free((void *)(uintptr_t)zc.zc_config_dst);
return (0);
+
+nomem:
+ free((void *)(uintptr_t)zc.zc_config_dst);
+ for (i = 0; i < zhp->zpool_error_count; i++) {
+ if (zhp->zpool_error_log[i])
+ free(zhp->zpool_error_log[i]);
+ }
+ free(zhp->zpool_error_log);
+ zhp->zpool_error_log = NULL;
+ return (no_memory(zhp->zpool_hdl));
}
/*
@@ -1616,20 +1611,13 @@ int
zpool_upgrade(zpool_handle_t *zhp)
{
zfs_cmd_t zc = { 0 };
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) strcpy(zc.zc_name, zhp->zpool_name);
- if (zfs_ioctl(ZFS_IOC_POOL_UPGRADE, &zc) != 0) {
- switch (errno) {
- case EPERM:
- zfs_error(dgettext(TEXT_DOMAIN, "cannot upgrade '%s': "
- "permission denied"), zhp->zpool_name);
- break;
- default:
- zfs_baderror(errno);
- }
-
- return (-1);
- }
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
+ return (zpool_standard_error(hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
+ zhp->zpool_name));
return (0);
}
diff --git a/usr/src/lib/libzfs/common/libzfs_status.c b/usr/src/lib/libzfs/common/libzfs_status.c
index 258b2e2f7d..2a4164964d 100644
--- a/usr/src/lib/libzfs/common/libzfs_status.c
+++ b/usr/src/lib/libzfs/common/libzfs_status.c
@@ -116,7 +116,7 @@ vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
/*
* Detect if any leaf devices that have seen errors or could not be opened.
*/
-static int
+static boolean_t
find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
{
nvlist_t **child;
@@ -132,13 +132,13 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
*/
verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
- return (FALSE);
+ return (B_FALSE);
if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
&children) == 0) {
for (c = 0; c < children; c++)
if (find_vdev_problem(child[c], func))
- return (TRUE);
+ return (B_TRUE);
} else {
verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
(uint64_t **)&vs, &c) == 0);
@@ -147,10 +147,10 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
vs->vs_read_errors +
vs->vs_write_errors +
vs->vs_checksum_errors))
- return (TRUE);
+ return (B_TRUE);
}
- return (FALSE);
+ return (B_FALSE);
}
/*
@@ -171,7 +171,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
* only picks the most damaging of all the current errors to report.
*/
static zpool_status_t
-check_status(nvlist_t *config, int isimport)
+check_status(nvlist_t *config, boolean_t isimport)
{
nvlist_t *nvroot;
vdev_stat_t *vs;
@@ -265,7 +265,7 @@ check_status(nvlist_t *config, int isimport)
zpool_status_t
zpool_get_status(zpool_handle_t *zhp, char **msgid)
{
- zpool_status_t ret = check_status(zhp->zpool_config, FALSE);
+ zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
if (ret >= NMSGID)
*msgid = NULL;
@@ -278,7 +278,7 @@ zpool_get_status(zpool_handle_t *zhp, char **msgid)
zpool_status_t
zpool_import_status(nvlist_t *config, char **msgid)
{
- zpool_status_t ret = check_status(config, TRUE);
+ zpool_status_t ret = check_status(config, B_TRUE);
if (ret >= NMSGID)
*msgid = NULL;
diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c
index c7f7528491..29e99dc5b1 100644
--- a/usr/src/lib/libzfs/common/libzfs_util.c
+++ b/usr/src/lib/libzfs/common/libzfs_util.c
@@ -43,90 +43,320 @@
#include "libzfs_impl.h"
-static int zfs_fd = -1;
-static FILE *mnttab_file;
-static FILE *sharetab_file;
-static int sharetab_opened;
+int
+libzfs_errno(libzfs_handle_t *hdl)
+{
+ return (hdl->libzfs_error);
+}
-void (*error_func)(const char *, va_list);
+const char *
+libzfs_error_action(libzfs_handle_t *hdl)
+{
+ return (hdl->libzfs_action);
+}
-/*
- * All error handling is kept within libzfs where we have the most information
- * immediately available. While this may not be suitable for a general purpose
- * library, it greatly simplifies our commands. This command name is used to
- * prefix all error messages appropriately.
- */
+const char *
+libzfs_error_description(libzfs_handle_t *hdl)
+{
+ if (hdl->libzfs_desc[0] != '\0')
+ return (hdl->libzfs_desc);
+
+ switch (hdl->libzfs_error) {
+ case EZFS_NOMEM:
+ return (dgettext(TEXT_DOMAIN, "out of memory"));
+ case EZFS_BADPROP:
+ return (dgettext(TEXT_DOMAIN, "invalid property value"));
+ case EZFS_PROPREADONLY:
+ return (dgettext(TEXT_DOMAIN, "read only property"));
+ case EZFS_PROPTYPE:
+ return (dgettext(TEXT_DOMAIN, "property doesn't apply to "
+ "datasets of this type"));
+ case EZFS_PROPNONINHERIT:
+ return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
+ case EZFS_PROPSPACE:
+ return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
+ case EZFS_BADTYPE:
+ return (dgettext(TEXT_DOMAIN, "operation not applicable to "
+ "datasets of this type"));
+ case EZFS_BUSY:
+ return (dgettext(TEXT_DOMAIN, "pool or dataset is busy"));
+ case EZFS_EXISTS:
+ return (dgettext(TEXT_DOMAIN, "pool or dataset exists"));
+ case EZFS_NOENT:
+ return (dgettext(TEXT_DOMAIN, "no such pool or dataset"));
+ case EZFS_BADSTREAM:
+ return (dgettext(TEXT_DOMAIN, "invalid backup stream"));
+ case EZFS_DSREADONLY:
+ return (dgettext(TEXT_DOMAIN, "dataset is read only"));
+ case EZFS_VOLTOOBIG:
+ return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
+ "this system"));
+ case EZFS_VOLHASDATA:
+ return (dgettext(TEXT_DOMAIN, "volume has data"));
+ case EZFS_INVALIDNAME:
+ return (dgettext(TEXT_DOMAIN, "invalid name"));
+ case EZFS_BADRESTORE:
+ return (dgettext(TEXT_DOMAIN, "unable to restore to "
+ "destination"));
+ case EZFS_BADBACKUP:
+ return (dgettext(TEXT_DOMAIN, "backup failed"));
+ case EZFS_BADTARGET:
+ return (dgettext(TEXT_DOMAIN, "invalid target vdev"));
+ case EZFS_NODEVICE:
+ return (dgettext(TEXT_DOMAIN, "no such device in pool"));
+ case EZFS_BADDEV:
+ return (dgettext(TEXT_DOMAIN, "invalid device"));
+ case EZFS_NOREPLICAS:
+ return (dgettext(TEXT_DOMAIN, "no valid replicas"));
+ case EZFS_RESILVERING:
+ return (dgettext(TEXT_DOMAIN, "currently resilvering"));
+ case EZFS_BADVERSION:
+ return (dgettext(TEXT_DOMAIN, "unsupported version"));
+ case EZFS_POOLUNAVAIL:
+ return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
+ case EZFS_DEVOVERFLOW:
+ return (dgettext(TEXT_DOMAIN, "too many devices in one vdev"));
+ case EZFS_BADPATH:
+ return (dgettext(TEXT_DOMAIN, "must be an absolute path"));
+ case EZFS_CROSSTARGET:
+ return (dgettext(TEXT_DOMAIN, "operation crosses datasets or "
+ "pools"));
+ case EZFS_ZONED:
+ return (dgettext(TEXT_DOMAIN, "dataset in use by local zone"));
+ case EZFS_MOUNTFAILED:
+ return (dgettext(TEXT_DOMAIN, "mount failed"));
+ case EZFS_UMOUNTFAILED:
+ return (dgettext(TEXT_DOMAIN, "umount failed"));
+ case EZFS_UNSHAREFAILED:
+ return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
+ case EZFS_SHAREFAILED:
+ return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
+ case EZFS_DEVLINKS:
+ return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
+ case EZFS_PERM:
+ return (dgettext(TEXT_DOMAIN, "permission denied"));
+ case EZFS_NOSPC:
+ return (dgettext(TEXT_DOMAIN, "out of space"));
+ case EZFS_IO:
+ return (dgettext(TEXT_DOMAIN, "I/O error"));
+ case EZFS_INTR:
+ return (dgettext(TEXT_DOMAIN, "signal received"));
+ case EZFS_ISSPARE:
+ return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
+ "spare"));
+ case EZFS_INVALCONFIG:
+ return (dgettext(TEXT_DOMAIN, "invalid vdev configuration"));
+ case EZFS_UNKNOWN:
+ return (dgettext(TEXT_DOMAIN, "unknown error"));
+ default:
+ abort();
+ }
+
+ /* NOTREACHED */
+}
+
+/*PRINTFLIKE2*/
void
-zfs_error(const char *fmt, ...)
+zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- if (error_func != NULL) {
- error_func(fmt, ap);
- } else {
- (void) vfprintf(stderr, fmt, ap);
- (void) fprintf(stderr, "\n");
+ (void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc),
+ fmt, ap);
+ hdl->libzfs_desc_active = 1;
+
+ va_end(ap);
+}
+
+static void
+zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap)
+{
+ (void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action),
+ fmt, ap);
+ hdl->libzfs_error = error;
+
+ if (hdl->libzfs_desc_active)
+ hdl->libzfs_desc_active = 0;
+ else
+ hdl->libzfs_desc[0] = '\0';
+
+ if (hdl->libzfs_printerr) {
+ if (error == EZFS_UNKNOWN) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal "
+ "error: %s\n"), libzfs_error_description(hdl));
+ abort();
+ }
+
+ (void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action,
+ libzfs_error_description(hdl));
+ if (error == EZFS_NOMEM)
+ exit(1);
}
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_error(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ zfs_verror(hdl, error, fmt, ap);
va_end(ap);
+
+ return (-1);
}
-/*
- * An internal error is something that we cannot recover from, and should never
- * happen (such as running out of memory). It should only be used in
- * exceptional circumstances.
- */
-void
-zfs_fatal(const char *fmt, ...)
+static int
+zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
+ va_list ap)
+{
+ switch (error) {
+ case EPERM:
+ case EACCES:
+ zfs_verror(hdl, EZFS_PERM, fmt, ap);
+ return (-1);
+
+ case EIO:
+ zfs_verror(hdl, EZFS_IO, fmt, ap);
+ return (-1);
+
+ case EINTR:
+ zfs_verror(hdl, EZFS_INTR, fmt, ap);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_standard_error(libzfs_handle_t *hdl, int error, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- if (error_func != NULL) {
- error_func(fmt, ap);
- } else {
- (void) vfprintf(stderr, fmt, ap);
- (void) fprintf(stderr, "\n");
+ if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+ va_end(ap);
+ return (-1);
}
- va_end(ap);
- exit(1);
+ switch (error) {
+ case ENXIO:
+ zfs_verror(hdl, EZFS_IO, fmt, ap);
+ break;
+
+ case ENOENT:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset does not exist"));
+ zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+ break;
+
+ case ENOSPC:
+ case EDQUOT:
+ zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
+ return (-1);
+
+ case EEXIST:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset already exists"));
+ zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+ break;
+
+ case EBUSY:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset is busy"));
+ zfs_verror(hdl, EZFS_BUSY, fmt, ap);
+ break;
+
+ default:
+ zfs_error_aux(hdl, strerror(errno));
+ zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+ break;
+ }
+
+ va_end(ap);
+ return (-1);
}
-/*
- * Consumers (such as the JNI interface) that need to capture error output can
- * override the default error handler using this function.
- */
-void
-zfs_set_error_handler(void (*func)(const char *, va_list))
+/*PRINTFLIKE3*/
+int
+zpool_standard_error(libzfs_handle_t *hdl, int error, const char *fmt, ...)
{
- error_func = func;
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+ va_end(ap);
+ return (-1);
+ }
+
+ switch (error) {
+ case ENODEV:
+ zfs_verror(hdl, EZFS_NODEVICE, fmt, ap);
+ break;
+
+ case ENOENT:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
+ zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+ break;
+
+ case EEXIST:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool already exists"));
+ zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+ break;
+
+ case EBUSY:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy"));
+ zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+ break;
+
+ case ENXIO:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more devices is currently unavailable"));
+ zfs_verror(hdl, EZFS_BADDEV, fmt, ap);
+ break;
+
+ case ENAMETOOLONG:
+ zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap);
+ break;
+
+ default:
+ zfs_error_aux(hdl, strerror(error));
+ zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+ }
+
+ va_end(ap);
+ return (-1);
}
/*
* Display an out of memory error message and abort the current program.
*/
-void
-no_memory(void)
+int
+no_memory(libzfs_handle_t *hdl)
{
- assert(errno == ENOMEM);
- zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: out of memory\n"));
+ return (zfs_error(hdl, EZFS_NOMEM, "internal error"));
}
/*
* A safe form of malloc() which will die if the allocation fails.
*/
void *
-zfs_malloc(size_t size)
+zfs_alloc(libzfs_handle_t *hdl, size_t size)
{
void *data;
if ((data = calloc(1, size)) == NULL)
- no_memory();
+ (void) no_memory(hdl);
return (data);
}
@@ -135,69 +365,17 @@ zfs_malloc(size_t size)
* A safe form of strdup() which will die if the allocation fails.
*/
char *
-zfs_strdup(const char *str)
+zfs_strdup(libzfs_handle_t *hdl, const char *str)
{
char *ret;
if ((ret = strdup(str)) == NULL)
- no_memory();
+ (void) no_memory(hdl);
return (ret);
}
/*
- * Utility functions around common used files - /dev/zfs, /etc/mnttab, and
- * /etc/dfs/sharetab.
- */
-int
-zfs_ioctl(int cmd, zfs_cmd_t *zc)
-{
- if (zfs_fd == -1 &&
- (zfs_fd = open(ZFS_DEV, O_RDWR)) < 0)
- zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: unable to "
- "open ZFS device\n"), MNTTAB);
-
- return (ioctl(zfs_fd, cmd, zc));
-}
-
-FILE *
-zfs_mnttab(void)
-{
- if (mnttab_file == NULL &&
- (mnttab_file = fopen(MNTTAB, "r")) == NULL)
- zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: unable to "
- "open %s\n"), MNTTAB);
-
- return (mnttab_file);
-}
-
-FILE *
-zfs_sharetab(void)
-{
- if (sharetab_opened)
- return (sharetab_file);
-
- sharetab_opened = TRUE;
- return (sharetab_file = fopen("/etc/dfs/sharetab", "r"));
-}
-
-/*
- * Cleanup function for library. Close any file descriptors that were
- * opened as part of the above functions.
- */
-#pragma fini(zfs_fini)
-void
-zfs_fini(void)
-{
- if (zfs_fd != -1)
- (void) close(zfs_fd);
- if (sharetab_file)
- (void) fclose(sharetab_file);
- if (mnttab_file)
- (void) fclose(mnttab_file);
-}
-
-/*
* Convert a number to an appropriately human-readable output.
*/
void
@@ -241,3 +419,58 @@ zfs_nicenum(uint64_t num, char *buf, size_t buflen)
}
}
}
+
+void
+libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
+{
+ hdl->libzfs_printerr = printerr;
+}
+
+libzfs_handle_t *
+libzfs_init(void)
+{
+ libzfs_handle_t *hdl;
+
+ if ((hdl = calloc(sizeof (libzfs_handle_t), 1)) == NULL) {
+ return (NULL);
+ }
+
+ if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) == NULL) {
+ free(hdl);
+ return (NULL);
+ }
+
+ if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
+ (void) close(hdl->libzfs_fd);
+ free(hdl);
+ return (NULL);
+ }
+
+ hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r");
+
+ return (hdl);
+}
+
+void
+libzfs_fini(libzfs_handle_t *hdl)
+{
+ (void) close(hdl->libzfs_fd);
+ if (hdl->libzfs_mnttab)
+ (void) fclose(hdl->libzfs_mnttab);
+ if (hdl->libzfs_sharetab)
+ (void) fclose(hdl->libzfs_sharetab);
+ namespace_clear(hdl);
+ free(hdl);
+}
+
+libzfs_handle_t *
+zpool_get_handle(zpool_handle_t *zhp)
+{
+ return (zhp->zpool_hdl);
+}
+
+libzfs_handle_t *
+zfs_get_handle(zfs_handle_t *zhp)
+{
+ return (zhp->zfs_hdl);
+}
diff --git a/usr/src/lib/libzfs/spec/libzfs.spec b/usr/src/lib/libzfs/spec/libzfs.spec
index 1789122711..6120603e18 100644
--- a/usr/src/lib/libzfs/spec/libzfs.spec
+++ b/usr/src/lib/libzfs/spec/libzfs.spec
@@ -24,6 +24,30 @@
#
#ident "%Z%%M% %I% %E% SMI"
+function libzfs_fini
+version SUNWprivate_1.1
+end
+
+function libzfs_init
+version SUNWprivate_1.1
+end
+
+function libzfs_errno
+version SUNWprivate_1.1
+end
+
+function libzfs_error_action
+version SUNWprivate_1.1
+end
+
+function libzfs_error_description
+version SUNWprivate_1.1
+end
+
+function libzfs_print_on_error
+version SUNWprivate_1.1
+end
+
function zfs_clone
version SUNWprivate_1.1
end
@@ -40,6 +64,10 @@ function zfs_destroy
version SUNWprivate_1.1
end
+function zfs_get_handle
+version SUNWprivate_1.1
+end
+
function zfs_get_name
version SUNWprivate_1.1
end
@@ -104,6 +132,10 @@ function zfs_open
version SUNWprivate_1.1
end
+function zfs_promote
+version SUNWprivate_1.1
+end
+
function zfs_prop_column_name
version SUNWprivate_1.1
end
@@ -188,10 +220,6 @@ function zfs_send
version SUNWprivate_1.1
end
-function zfs_set_error_handler
-version SUNWprivate_1.1
-end
-
function zfs_share
version SUNWprivate_1.1
end
@@ -248,6 +276,10 @@ function zpool_export
version SUNWprivate_1.1
end
+function zpool_find_vdev
+version SUNWprivate_1.1
+end
+
function zpool_find_import
version SUNWprivate_1.1
end
@@ -264,6 +296,10 @@ function zpool_get_guid
version SUNWprivate_1.1
end
+function zpool_get_handle
+version SUNWprivate_1.1
+end
+
function zpool_get_name
version SUNWprivate_1.1
end
@@ -288,6 +324,10 @@ function zpool_get_status
version SUNWprivate_1.1
end
+function zpool_get_version
+version SUNWprivate_1.1
+end
+
function zpool_import
version SUNWprivate_1.1
end
@@ -352,6 +392,7 @@ function zpool_vdev_name
version SUNWprivate_1.1
end
-function zpool_vdev_to_guid
-version SUNWprivate_1.1
+function zpool_vdev_remove
+version SUNWprivate_1.1
end
+
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c
index 64270f2cd7..2daeca32e2 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c
@@ -574,7 +574,7 @@ is_fs_snapshot(zfs_handle_t *zhp)
zjni_get_dataset_from_snapshot(
zfs_get_name(zhp), parent, sizeof (parent));
- parent_zhp = zfs_open(parent, ZFS_TYPE_ANY);
+ parent_zhp = zfs_open(g_zfs, parent, ZFS_TYPE_ANY);
if (parent_zhp == NULL) {
return (-1);
}
@@ -606,7 +606,8 @@ zjni_create_add_Pool(zpool_handle_t *zphp, void *data)
zjni_Collection_t *list = ((zjni_ArrayCallbackData_t *)data)->list;
/* Get root fs for this pool -- may be NULL if pool is faulted */
- zfs_handle_t *zhp = zfs_open(zpool_get_name(zphp), ZFS_TYPE_FILESYSTEM);
+ zfs_handle_t *zhp = zfs_open(g_zfs, zpool_get_name(zphp),
+ ZFS_TYPE_FILESYSTEM);
jobject bean = create_PoolBean(env, zphp, zhp);
@@ -682,7 +683,7 @@ zjni_get_Datasets_below(JNIEnv *env, jstring parentUTF,
zjni_new_DatasetSet(env, list);
/* Retrieve parent dataset */
- zhp = zfs_open(name, parent_typemask);
+ zhp = zfs_open(g_zfs, name, parent_typemask);
if (zhp != NULL) {
zjni_DatasetArrayCallbackData_t data = {0};
@@ -703,7 +704,7 @@ zjni_get_Datasets_below(JNIEnv *env, jstring parentUTF,
/* Parent is not a dataset -- see if it's a faulted pool */
if ((parent_typemask & ZFS_TYPE_FILESYSTEM) &&
is_pool_name(name)) {
- zpool_handle_t *zphp = zpool_open_canfail(name);
+ zpool_handle_t *zphp = zpool_open_canfail(g_zfs, name);
if (zphp != NULL) {
/* A faulted pool has no datasets */
@@ -750,7 +751,7 @@ zjni_get_Datasets_dependents(JNIEnv *env, jobjectArray paths)
const char *path =
(*env)->GetStringUTFChars(env, pathUTF, NULL);
- zfs_handle_t *zhp = zfs_open(path, ZFS_TYPE_ANY);
+ zfs_handle_t *zhp = zfs_open(g_zfs, path, ZFS_TYPE_ANY);
if (zhp != NULL) {
/* Add all dependents of this Dataset to list */
(void) zfs_iter_dependents(zhp,
@@ -762,7 +763,8 @@ zjni_get_Datasets_dependents(JNIEnv *env, jobjectArray paths)
/* Path is not a dataset - see if it's a faulted pool */
if (is_pool_name(path)) {
- zpool_handle_t *zphp = zpool_open_canfail(path);
+ zpool_handle_t *zphp = zpool_open_canfail(g_zfs,
+ path);
if (zphp != NULL) {
/*
@@ -795,10 +797,10 @@ zjni_get_Dataset(JNIEnv *env, jstring nameUTF, zfs_type_t typemask)
{
jobject device = NULL;
const char *name = (*env)->GetStringUTFChars(env, nameUTF, NULL);
- zfs_handle_t *zhp = zfs_open(name, typemask);
+ zfs_handle_t *zhp = zfs_open(g_zfs, name, typemask);
if ((typemask & ZFS_TYPE_FILESYSTEM) && is_pool_name(name)) {
- zpool_handle_t *zphp = zpool_open_canfail(name);
+ zpool_handle_t *zphp = zpool_open_canfail(g_zfs, name);
if (zphp != NULL) {
device = create_PoolBean(env, zphp, zhp);
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c
index 34500684d3..a699ecd7ce 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c
@@ -35,6 +35,8 @@
#include "libzfs_jni_diskmgt.h"
#include "libzfs_jni_disk.h"
+libzfs_handle_t *g_zfs;
+
/*
* Function prototypes
*/
@@ -46,14 +48,14 @@ static void init();
* Static functions
*/
-char libzfs_err[1024];
+char libdskmgt_err[1024];
static void
handle_error(const char *fmt, va_list ap)
{
/* Save the error message in case it's needed */
- (void) vsnprintf(libzfs_err, sizeof (libzfs_err), fmt, ap);
+ (void) vsnprintf(libdskmgt_err, sizeof (libdskmgt_err), fmt, ap);
#ifdef DEBUG
- (void) fprintf(stderr, "caught error: %s\n", libzfs_err);
+ (void) fprintf(stderr, "caught error: %s\n", libdskmgt_err);
#endif
}
@@ -64,10 +66,8 @@ handle_error(const char *fmt, va_list ap)
static void
init()
{
- libzfs_err[0] = '\0';
-
- /* libzfs error handler */
- zfs_set_error_handler(handle_error);
+ if ((g_zfs = libzfs_init()) == NULL)
+ abort();
/* diskmgt.o error handler */
dmgt_set_error_handler(handle_error);
@@ -151,7 +151,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getPools(JNIEnv *env, jobject obj)
data.env = env;
data.list = (zjni_Collection_t *)list;
- result = zpool_iter(zjni_create_add_Pool, &data);
+ result = zpool_iter(g_zfs, zjni_create_add_Pool, &data);
if (result && (*env)->ExceptionOccurred(env) != NULL) {
/* Must not call any more Java methods to preserve exception */
return (NULL);
@@ -334,7 +334,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevice(JNIEnv *env,
if (poolUTF != NULL) {
const char *pool = (*env)->GetStringUTFChars(env, poolUTF,
NULL);
- zpool_handle_t *zhp = zpool_open_canfail(pool);
+ zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool);
(*env)->ReleaseStringUTFChars(env, poolUTF, pool);
if (zhp != NULL) {
@@ -371,7 +371,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevices__Ljava_lang_Stri
if (poolUTF != NULL) {
const char *pool = (*env)->GetStringUTFChars(env, poolUTF,
NULL);
- zpool_handle_t *zhp = zpool_open_canfail(pool);
+ zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool);
(*env)->ReleaseStringUTFChars(env, poolUTF, pool);
/* Is the pool valid? */
@@ -408,7 +408,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevices__Ljava_lang_Stri
if (poolUTF != NULL) {
const char *pool = (*env)->GetStringUTFChars(env,
poolUTF, NULL);
- zpool_handle_t *zhp = zpool_open_canfail(pool);
+ zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool);
(*env)->ReleaseStringUTFChars(env, poolUTF, pool);
/* Is the pool valid? */
@@ -446,7 +446,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getAvailableDisks(JNIEnv *env,
error = dmgt_avail_disk_iter(zjni_create_add_DiskDevice, &data);
if (error) {
- zjni_throw_exception(env, "%s", libzfs_err);
+ zjni_throw_exception(env, "%s", libdskmgt_err);
} else {
array = zjni_Collection_to_array(
env, (zjni_Collection_t *)list,
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
index d9d09804ec..0e228460dc 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
@@ -26,6 +26,7 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#include "libzfs_jni_util.h"
#include "libzfs_jni_pool.h"
#include <strings.h>
@@ -1110,7 +1111,7 @@ zjni_pool_status_to_obj(JNIEnv *env, zpool_status_t status)
int
zjni_ipool_iter(int argc, char **argv, zjni_ipool_iter_f func, void *data)
{
- nvlist_t *pools = zpool_find_import(argc, argv);
+ nvlist_t *pools = zpool_find_import(g_zfs, argc, argv);
if (pools != NULL) {
nvpair_t *elem = NULL;
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h b/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h
index 1b878a4977..b6989239ac 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h
@@ -32,6 +32,7 @@
#include <jni.h>
#include <regex.h>
#include <libnvpair.h>
+#include <libzfs.h>
#ifdef __cplusplus
extern "C" {
@@ -105,6 +106,8 @@ int zjni_count_elements(void **);
nvpair_t *zjni_nvlist_walk_nvpair(
nvlist_t *, const char *, data_type_t, nvpair_t *);
+extern libzfs_handle_t *g_zfs;
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libzpool/common/util.c b/usr/src/lib/libzpool/common/util.c
index 094c8b6c6f..df49adbc7a 100644
--- a/usr/src/lib/libzpool/common/util.c
+++ b/usr/src/lib/libzpool/common/util.c
@@ -111,11 +111,17 @@ show_vdev_stats(const char *desc, nvlist_t *nv, int indent)
for (c = 0; c < children; c++) {
nvlist_t *cnv = child[c];
- char *cname;
+ char *cname, *tname;
+ uint64_t np;
if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) &&
nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname))
cname = "<unknown>";
- show_vdev_stats(cname, cnv, indent + 2);
+ tname = calloc(1, strlen(cname) + 2);
+ (void) strcpy(tname, cname);
+ if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0)
+ tname[strlen(tname)] = '0' + np;
+ show_vdev_stats(tname, cnv, indent + 2);
+ free(tname);
}
}
diff --git a/usr/src/pkgdefs/SUNWfmd/prototype_com b/usr/src/pkgdefs/SUNWfmd/prototype_com
index 01b0e4cea5..c99e4aca59 100644
--- a/usr/src/pkgdefs/SUNWfmd/prototype_com
+++ b/usr/src/pkgdefs/SUNWfmd/prototype_com
@@ -74,6 +74,8 @@ f none usr/lib/fm/fmd/plugins/syslog-msgs.conf 644 root bin
f none usr/lib/fm/fmd/plugins/syslog-msgs.so 555 root bin
f none usr/lib/fm/fmd/plugins/zfs-diagnosis.conf 644 root bin
f none usr/lib/fm/fmd/plugins/zfs-diagnosis.so 555 root bin
+f none usr/lib/fm/fmd/plugins/zfs-retire.conf 644 root bin
+f none usr/lib/fm/fmd/plugins/zfs-retire.so 555 root bin
d none usr/lib/fm/fmd/schemes 755 root bin
f none usr/lib/fm/fmd/schemes/cpu.so 555 root bin
f none usr/lib/fm/fmd/schemes/dev.so 555 root bin
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index ba07ea12d4..f979159f8a 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -356,8 +356,6 @@ buf_hash_find(spa_t *spa, dva_t *dva, uint64_t birth, kmutex_t **lockp)
* will be returned and the new element will not be inserted.
* Otherwise returns NULL.
*/
-static arc_buf_hdr_t *fbufs[4]; /* XXX to find 6341326 */
-static kthread_t *fbufs_lastthread;
static arc_buf_hdr_t *
buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
{
@@ -367,13 +365,10 @@ buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
uint32_t max, i;
ASSERT(!HDR_IN_HASH_TABLE(buf));
- fbufs_lastthread = curthread;
*lockp = hash_lock;
mutex_enter(hash_lock);
for (fbuf = buf_hash_table.ht_table[idx], i = 0; fbuf != NULL;
fbuf = fbuf->b_hash_next, i++) {
- if (i < sizeof (fbufs) / sizeof (fbufs[0]))
- fbufs[i] = fbuf;
if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf))
return (fbuf);
}
diff --git a/usr/src/uts/common/fs/zfs/bplist.c b/usr/src/uts/common/fs/zfs/bplist.c
index db0d3534d6..4442b1f28a 100644
--- a/usr/src/uts/common/fs/zfs/bplist.c
+++ b/usr/src/uts/common/fs/zfs/bplist.c
@@ -45,12 +45,13 @@ bplist_hold(bplist_t *bpl)
uint64_t
bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
{
- uint64_t obj;
+ int size;
- obj = dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
- DMU_OT_BPLIST_HDR, sizeof (bplist_phys_t), tx);
+ size = spa_version(dmu_objset_spa(mos)) < ZFS_VERSION_BPLIST_ACCOUNT ?
+ BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
- return (obj);
+ return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
+ DMU_OT_BPLIST_HDR, size, tx));
}
void
@@ -76,11 +77,14 @@ bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
ASSERT(bpl->bpl_cached_dbuf == NULL);
ASSERT(bpl->bpl_queue == NULL);
ASSERT(object != 0);
+ ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
+ ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
bpl->bpl_mos = mos;
bpl->bpl_object = object;
bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
+ bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
mutex_exit(&bpl->bpl_lock);
return (0);
@@ -210,7 +214,12 @@ bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
bpl->bpl_phys->bpl_entries++;
- bpl->bpl_phys->bpl_bytes += BP_GET_ASIZE(bp);
+ bpl->bpl_phys->bpl_bytes +=
+ bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
+ if (bpl->bpl_havecomp) {
+ bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
+ bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
+ }
mutex_exit(&bpl->bpl_lock);
return (0);
@@ -259,5 +268,45 @@ bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
bpl->bpl_object, 0, -1ULL, tx));
bpl->bpl_phys->bpl_entries = 0;
bpl->bpl_phys->bpl_bytes = 0;
+ if (bpl->bpl_havecomp) {
+ bpl->bpl_phys->bpl_comp = 0;
+ bpl->bpl_phys->bpl_uncomp = 0;
+ }
+ mutex_exit(&bpl->bpl_lock);
+}
+
+int
+bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
+{
+ uint64_t itor = 0, comp = 0, uncomp = 0;
+ int err;
+ blkptr_t bp;
+
+ mutex_enter(&bpl->bpl_lock);
+
+ err = bplist_hold(bpl);
+ if (err) {
+ mutex_exit(&bpl->bpl_lock);
+ return (err);
+ }
+
+ *usedp = bpl->bpl_phys->bpl_bytes;
+ if (bpl->bpl_havecomp) {
+ *compp = bpl->bpl_phys->bpl_comp;
+ *uncompp = bpl->bpl_phys->bpl_uncomp;
+ }
mutex_exit(&bpl->bpl_lock);
+
+ if (!bpl->bpl_havecomp) {
+ while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
+ comp += BP_GET_PSIZE(&bp);
+ uncomp += BP_GET_UCSIZE(&bp);
+ }
+ if (err == ENOENT)
+ err = 0;
+ *compp = comp;
+ *uncompp = uncomp;
+ }
+
+ return (err);
}
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index 2135427b7a..e78f49c4f9 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -1029,7 +1029,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* it's OK if we get an odd answer.
*/
dnode_willuse_space(dn,
- -BP_GET_ASIZE(db->db_blkptr), tx);
+ -bp_get_dasize(os->os_spa, db->db_blkptr), tx);
}
dnode_willuse_space(dn, db->db.db_size, tx);
}
@@ -1951,8 +1951,8 @@ dbuf_sync(dmu_buf_impl_t *db, zio_t *zio, dmu_tx_t *tx)
arc_buf_t **old =
(arc_buf_t **)&db->db_d.db_data_old[txg&TXG_MASK];
blkptr_t **bpp = &db->db_d.db_overridden_by[txg&TXG_MASK];
- int old_size = BP_GET_ASIZE(db->db_blkptr);
- int new_size = BP_GET_ASIZE(*bpp);
+ int old_size = bp_get_dasize(os->os_spa, db->db_blkptr);
+ int new_size = bp_get_dasize(os->os_spa, *bpp);
ASSERT(db->db_blkid != DB_BONUS_BLKID);
@@ -2078,8 +2078,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
dprintf_dbuf_bp(db, &zio->io_bp_orig, "bp_orig: %s", "");
- old_size = BP_GET_ASIZE(&zio->io_bp_orig);
- new_size = BP_GET_ASIZE(zio->io_bp);
+ old_size = bp_get_dasize(os->os_spa, &zio->io_bp_orig);
+ new_size = bp_get_dasize(os->os_spa, zio->io_bp);
dnode_diduse_space(dn, new_size-old_size);
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index 52c8413c9a..77886f5e24 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -789,7 +789,7 @@ replay_incremental_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
/* The point of no (unsuccessful) return. */
dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_inconsistent = TRUE;
+ ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
return (0);
@@ -841,7 +841,7 @@ replay_full_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
ds, drrb->drr_type, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_inconsistent = TRUE;
+ ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
return (0);
@@ -875,7 +875,7 @@ replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_creation_time = drrb->drr_creation_time;
ds->ds_phys->ds_guid = drrb->drr_toguid;
- ds->ds_phys->ds_inconsistent = FALSE;
+ ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG);
@@ -883,7 +883,7 @@ replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
dd->dd_phys->dd_head_dataset_obj,
NULL, DS_MODE_STANDARD | DS_MODE_INCONSISTENT, FTAG, &ds));
dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_inconsistent = FALSE;
+ ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
return (0);
@@ -1686,7 +1686,8 @@ dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
doi->doi_indirection = dn->dn_nlevels;
doi->doi_checksum = dn->dn_checksum;
doi->doi_compress = dn->dn_compress;
- doi->doi_physical_blks = dn->dn_phys->dn_secphys;
+ doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) +
+ SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT;
doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid;
doi->doi_type = dn->dn_type;
doi->doi_bonus_size = dn->dn_bonuslen;
@@ -1735,7 +1736,9 @@ dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512)
dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode;
*blksize = dn->dn_datablksz;
- *nblk512 = dn->dn_phys->dn_secphys + 1; /* add 1 for dnode space */
+ /* add 1 for dnode space */
+ *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >>
+ SPA_MINBLOCKSHIFT) + 1;
}
/*
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c
index 248612e3cc..3d5f1f7b5c 100644
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c
@@ -164,9 +164,10 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
/*
* Note: the changed_cb will be called once before the register
* func returns, thus changing the checksum/compression from the
- * default (fletcher2/off).
+ * default (fletcher2/off). Snapshots don't need to know, and
+ * registering would complicate clone promotion.
*/
- if (ds) {
+ if (ds && ds->ds_phys->ds_num_children == 0) {
err = dsl_prop_register(ds, "checksum",
checksum_changed_cb, osi);
if (err == 0)
@@ -177,7 +178,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
kmem_free(osi, sizeof (objset_impl_t));
return (err);
}
- } else {
+ } else if (ds == NULL) {
/* It's the meta-objset. */
osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
osi->os_compress = ZIO_COMPRESS_LZJB;
@@ -329,21 +330,18 @@ dmu_objset_evict(dsl_dataset_t *ds, void *arg)
{
objset_impl_t *osi = arg;
objset_t os;
- int err, i;
+ int i;
for (i = 0; i < TXG_SIZE; i++) {
ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL);
ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL);
}
- if (ds) {
- err = dsl_prop_unregister(ds, "checksum",
- checksum_changed_cb, osi);
- ASSERT(err == 0);
-
- err = dsl_prop_unregister(ds, "compression",
- compression_changed_cb, osi);
- ASSERT(err == 0);
+ if (ds && ds->ds_phys->ds_num_children == 0) {
+ VERIFY(0 == dsl_prop_unregister(ds, "checksum",
+ checksum_changed_cb, osi));
+ VERIFY(0 == dsl_prop_unregister(ds, "compression",
+ compression_changed_cb, osi));
}
/*
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 91ee5c5062..1b4a0c2bd0 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -181,8 +181,9 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
/*
* For i/o error checking, read the first and last level-0
- * blocks, and all the level-1 blocks. We needn't do this on
- * the meta-dnode, because we've already read it in.
+ * blocks (if they are not aligned), and all the level-1 blocks.
+ * We needn't do this on the meta-dnode, because we've already
+ * read it in.
*/
if (dn && dn->dn_object != DMU_META_DNODE_OBJECT) {
@@ -199,16 +200,20 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
NULL, NULL, ZIO_FLAG_CANFAIL);
/* first level-0 block */
- start = off/dn->dn_datablksz;
- err = dmu_tx_check_ioerr(zio, dn, 0, start);
- if (err) {
- tx->tx_err = err;
- return;
+ start = off >> dn->dn_datablkshift;
+ if (P2PHASE(off, dn->dn_datablksz) ||
+ len < dn->dn_datablksz) {
+ err = dmu_tx_check_ioerr(zio, dn, 0, start);
+ if (err) {
+ tx->tx_err = err;
+ return;
+ }
}
/* last level-0 block */
- end = (off+len)/dn->dn_datablksz;
- if (end != start) {
+ end = (off+len-1) >> dn->dn_datablkshift;
+ if (end != start &&
+ P2PHASE(off+len, dn->dn_datablksz)) {
err = dmu_tx_check_ioerr(zio, dn, 0, end);
if (err) {
tx->tx_err = err;
@@ -330,6 +335,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
uint64_t blkid, nblks;
uint64_t space = 0;
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+ spa_t *spa = tx->tx_pool->dp_spa;
int dirty;
/*
@@ -388,7 +394,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
bp += blkid + i;
if (dsl_dataset_block_freeable(ds, bp->blk_birth)) {
dprintf_bp(bp, "can free old%s", "");
- space += BP_GET_ASIZE(bp);
+ space += bp_get_dasize(spa, bp);
}
}
nblks = 0;
@@ -423,7 +429,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
bp[i].blk_birth)) {
dprintf_bp(&bp[i],
"can free old%s", "");
- space += BP_GET_ASIZE(&bp[i]);
+ space += bp_get_dasize(spa, &bp[i]);
}
}
dbuf_rele(dbuf, FTAG);
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c
index 6de40f5081..43f1d4f135 100644
--- a/usr/src/uts/common/fs/zfs/dnode.c
+++ b/usr/src/uts/common/fs/zfs/dnode.c
@@ -177,17 +177,10 @@ dnode_byteswap(dnode_phys_t *dnp)
return;
}
- dnp->dn_type = BSWAP_8(dnp->dn_type);
- dnp->dn_indblkshift = BSWAP_8(dnp->dn_indblkshift);
- dnp->dn_nlevels = BSWAP_8(dnp->dn_nlevels);
- dnp->dn_nblkptr = BSWAP_8(dnp->dn_nblkptr);
- dnp->dn_bonustype = BSWAP_8(dnp->dn_bonustype);
- dnp->dn_checksum = BSWAP_8(dnp->dn_checksum);
- dnp->dn_compress = BSWAP_8(dnp->dn_compress);
dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
- dnp->dn_secphys = BSWAP_64(dnp->dn_secphys);
+ dnp->dn_used = BSWAP_64(dnp->dn_used);
/*
* dn_nblkptr is only one byte, so it's OK to read it in either
@@ -1110,27 +1103,29 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
/* call from syncing context when we actually write/free space for this dnode */
void
-dnode_diduse_space(dnode_t *dn, int64_t space)
+dnode_diduse_space(dnode_t *dn, int64_t delta)
{
- uint64_t sectors;
-
- dprintf_dnode(dn, "dn=%p dnp=%p secphys=%llu space=%lld\n",
+ uint64_t space;
+ dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n",
dn, dn->dn_phys,
- (u_longlong_t)dn->dn_phys->dn_secphys,
- (longlong_t)space);
-
- ASSERT(P2PHASE(space, 1<<DEV_BSHIFT) == 0);
+ (u_longlong_t)dn->dn_phys->dn_used,
+ (longlong_t)delta);
mutex_enter(&dn->dn_mtx);
- if (space > 0) {
- sectors = space >> DEV_BSHIFT;
- ASSERT3U(dn->dn_phys->dn_secphys + sectors, >=,
- dn->dn_phys->dn_secphys);
- dn->dn_phys->dn_secphys += sectors;
+ space = DN_USED_BYTES(dn->dn_phys);
+ if (delta > 0) {
+ ASSERT3U(space + delta, >=, space); /* no overflow */
+ } else {
+ ASSERT3U(space, >=, -delta); /* no underflow */
+ }
+ space += delta;
+ if (spa_version(dn->dn_objset->os_spa) < ZFS_VERSION_DNODE_BYTES) {
+ ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
+ ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0);
+ dn->dn_phys->dn_used = space >> DEV_BSHIFT;
} else {
- sectors = -space >> DEV_BSHIFT;
- ASSERT3U(dn->dn_phys->dn_secphys, >=, sectors);
- dn->dn_phys->dn_secphys -= sectors;
+ dn->dn_phys->dn_used = space;
+ dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES;
}
mutex_exit(&dn->dn_mtx);
}
diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c
index 80ac38c86a..5bb538980e 100644
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c
@@ -119,8 +119,8 @@ free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
if (BP_IS_HOLE(bp))
continue;
- bytesfreed += BP_GET_ASIZE(bp);
- ASSERT3U(bytesfreed >> DEV_BSHIFT, <=, dn->dn_phys->dn_secphys);
+ bytesfreed += bp_get_dasize(os->os_spa, bp);
+ ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
dsl_dataset_block_kill(os->os_dsl_dataset, bp, tx);
}
dnode_diduse_space(dn, -bytesfreed);
@@ -457,7 +457,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
/* free up all the blocks in the file. */
dnode_sync_free_range(dn, 0, dn->dn_phys->dn_maxblkid+1, tx);
- ASSERT3U(dn->dn_phys->dn_secphys, ==, 0);
+ ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0);
/* ASSERT(blkptrs are zero); */
ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c
index 5b1de1b4b8..a199aec8de 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c
@@ -28,6 +28,7 @@
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
+#include <sys/dsl_prop.h>
#include <sys/dmu_traverse.h>
#include <sys/dmu_tx.h>
#include <sys/arc.h>
@@ -43,10 +44,6 @@ static int dsl_dataset_destroy_begin_sync(dsl_dir_t *dd,
#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
-#define BP_GET_UCSIZE(bp) \
- ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
- BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
-
/*
* We use weighted reference counts to express the various forms of exclusion
* between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open
@@ -68,7 +65,7 @@ static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
void
dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
{
- int used = BP_GET_ASIZE(bp);
+ int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
int compressed = BP_GET_PSIZE(bp);
int uncompressed = BP_GET_UCSIZE(bp);
@@ -105,7 +102,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
void
dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
{
- int used = BP_GET_ASIZE(bp);
+ int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
int compressed = BP_GET_PSIZE(bp);
int uncompressed = BP_GET_UCSIZE(bp);
@@ -155,8 +152,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
ds->ds_phys->ds_prev_snap_obj);
ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
- ds->ds_object &&
- bp->blk_birth >
+ ds->ds_object && bp->blk_birth >
ds->ds_prev->ds_phys->ds_prev_snap_txg) {
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
mutex_enter(&ds->ds_prev->ds_lock);
@@ -373,7 +369,8 @@ dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
mutex_enter(&ds->ds_lock);
if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
- ds->ds_phys->ds_inconsistent && !DS_MODE_IS_INCONSISTENT(mode)) ||
+ (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
+ !DS_MODE_IS_INCONSISTENT(mode)) ||
(ds->ds_open_refcount + weight > DOS_REF_MAX)) {
mutex_exit(&ds->ds_lock);
dsl_dataset_close(ds, DS_MODE_NONE, tag);
@@ -842,7 +839,7 @@ kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
* Since this callback is not called concurrently, no lock is
* needed on the accounting values.
*/
- *ka->usedp += BP_GET_ASIZE(bp);
+ *ka->usedp += bp_get_dasize(spa, bp);
*ka->compressedp += BP_GET_PSIZE(bp);
*ka->uncompressedp += BP_GET_UCSIZE(bp);
/* XXX check for EIO? */
@@ -939,7 +936,7 @@ dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
ds->ds_prev->ds_phys->ds_compressed_bytes;
ds->ds_phys->ds_uncompressed_bytes =
ds->ds_prev->ds_phys->ds_uncompressed_bytes;
- ds->ds_phys->ds_inconsistent = ds->ds_prev->ds_phys->ds_inconsistent;
+ ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
ds->ds_phys->ds_unique_bytes = 0;
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
@@ -969,7 +966,7 @@ dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
/* Mark it as inconsistent on-disk, in case we crash */
dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_inconsistent = TRUE;
+ ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
return (0);
}
@@ -1120,10 +1117,10 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
bp.blk_birth >
ds_prev->ds_phys->ds_prev_snap_txg) {
ds_prev->ds_phys->ds_unique_bytes +=
- BP_GET_ASIZE(&bp);
+ bp_get_dasize(dp->dp_spa, &bp);
}
} else {
- used += BP_GET_ASIZE(&bp);
+ used += bp_get_dasize(dp->dp_spa, &bp);
compressed += BP_GET_PSIZE(&bp);
uncompressed += BP_GET_UCSIZE(&bp);
/* XXX check return value? */
@@ -1169,7 +1166,7 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
bp.blk_birth <=
ds->ds_phys->ds_creation_txg) {
ds_next->ds_phys->ds_unique_bytes +=
- BP_GET_ASIZE(&bp);
+ bp_get_dasize(dp->dp_spa, &bp);
}
}
@@ -1347,7 +1344,7 @@ dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
- dsphys->ds_inconsistent = ds->ds_phys->ds_inconsistent;
+ dsphys->ds_flags = ds->ds_phys->ds_flags;
dsphys->ds_bp = ds->ds_phys->ds_bp;
dmu_buf_rele(dbuf, FTAG);
@@ -1424,7 +1421,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds)
dds->dds_num_clones = ds->ds_phys->ds_num_children - 1;
}
- dds->dds_inconsistent = ds->ds_phys->ds_inconsistent;
+ dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth;
dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill;
@@ -1581,3 +1578,236 @@ dsl_dataset_rename(const char *osname, const char *newname)
dsl_dir_close(dd, FTAG);
return (err);
}
+
+/* ARGSUSED */
+static int
+dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
+{
+ dsl_dir_t *pdd = NULL;
+ dsl_dataset_t *ds = NULL;
+ dsl_dataset_t *hds = NULL;
+ dsl_dataset_t *phds = NULL;
+ dsl_dataset_t *pivot_ds = NULL;
+ dsl_dataset_t *newnext_ds = NULL;
+ int err;
+ char *name = NULL;
+ uint64_t used = 0, comp = 0, uncomp = 0, unique = 0, itor = 0;
+ blkptr_t bp;
+
+ /* Check that it is a clone */
+ if (dd->dd_phys->dd_clone_parent_obj == 0)
+ return (EINVAL);
+
+ /* Open everyone */
+ if (err = dsl_dataset_open_obj(dd->dd_pool,
+ dd->dd_phys->dd_clone_parent_obj,
+ NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds))
+ goto out;
+ pdd = pivot_ds->ds_dir;
+ if (err = dsl_dataset_open_obj(dd->dd_pool,
+ pdd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &phds))
+ goto out;
+ if (err = dsl_dataset_open_obj(dd->dd_pool,
+ dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds))
+ goto out;
+
+ if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
+ err = EXDEV;
+ goto out;
+ }
+
+ /* find pivot point's new next ds */
+ VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
+ NULL, DS_MODE_NONE, FTAG, &newnext_ds));
+ while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) {
+ dsl_dataset_t *prev;
+
+ if (err = dsl_dataset_open_obj(dd->dd_pool,
+ newnext_ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE,
+ FTAG, &prev))
+ goto out;
+ dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
+ newnext_ds = prev;
+ }
+
+ /* compute pivot point's new unique space */
+ while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
+ &itor, &bp)) == 0) {
+ if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg)
+ unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
+ }
+ if (err != ENOENT)
+ goto out;
+
+ /* need the config lock to ensure that the snapshots are not open */
+ rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER);
+
+ /* Walk the snapshots that we are moving */
+ name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ ds = pivot_ds;
+ /* CONSTCOND */
+ while (TRUE) {
+ uint64_t val, dlused, dlcomp, dluncomp;
+ dsl_dataset_t *prev;
+
+ /* Check that the snapshot name does not conflict */
+ dsl_dataset_name(ds, name);
+ err = zap_lookup(dd->dd_pool->dp_meta_objset,
+ hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
+ 8, 1, &val);
+ if (err != ENOENT) {
+ if (err == 0)
+ err = EEXIST;
+ goto out;
+ }
+
+ /*
+ * compute space to transfer. Each snapshot gave birth to:
+ * (my used) - (prev's used) + (deadlist's used)
+ */
+ used += ds->ds_phys->ds_used_bytes;
+ comp += ds->ds_phys->ds_compressed_bytes;
+ uncomp += ds->ds_phys->ds_uncompressed_bytes;
+
+ /* If we reach the first snapshot, we're done. */
+ if (ds->ds_phys->ds_prev_snap_obj == 0)
+ break;
+
+ if (err = bplist_space(&ds->ds_deadlist,
+ &dlused, &dlcomp, &dluncomp))
+ goto out;
+ if (err = dsl_dataset_open_obj(dd->dd_pool,
+ ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
+ FTAG, &prev))
+ goto out;
+ used += dlused - prev->ds_phys->ds_used_bytes;
+ comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
+ uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
+
+ /*
+ * We could be a clone of a clone. If we reach our
+ * parent's branch point, we're done.
+ */
+ if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
+ dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
+ break;
+ }
+ if (ds != pivot_ds)
+ dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+ ds = prev;
+ }
+ if (ds != pivot_ds)
+ dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+ ds = NULL;
+
+ /* Check that there is enough space here */
+ if (err = dsl_dir_transfer_possible(pdd, dd, used))
+ goto out;
+
+ /* The point of no (unsuccessful) return */
+
+ /* move snapshots to this dir */
+ ds = pivot_ds;
+ /* CONSTCOND */
+ while (TRUE) {
+ dsl_dataset_t *prev;
+
+ /* move snap name entry */
+ dsl_dataset_name(ds, name);
+ VERIFY(0 == zap_remove(dd->dd_pool->dp_meta_objset,
+ phds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, tx));
+ VERIFY(0 == zap_add(dd->dd_pool->dp_meta_objset,
+ hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
+ 8, 1, &ds->ds_object, tx));
+
+ /* change containing dsl_dir */
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object);
+ ds->ds_phys->ds_dir_obj = dd->dd_object;
+ ASSERT3P(ds->ds_dir, ==, pdd);
+ dsl_dir_close(ds->ds_dir, ds);
+ VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, dd->dd_object,
+ NULL, ds, &ds->ds_dir));
+
+ ASSERT3U(dsl_prop_numcb(ds), ==, 0);
+
+ if (ds->ds_phys->ds_prev_snap_obj == 0)
+ break;
+
+ VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
+ ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
+ FTAG, &prev));
+
+ if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
+ dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
+ break;
+ }
+ if (ds != pivot_ds)
+ dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+ ds = prev;
+ }
+
+ /* change pivot point's next snap */
+ dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx);
+ pivot_ds->ds_phys->ds_next_snap_obj = newnext_ds->ds_object;
+
+ /* change clone_parent-age */
+ dmu_buf_will_dirty(dd->dd_dbuf, tx);
+ ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object);
+ dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj;
+ dmu_buf_will_dirty(pdd->dd_dbuf, tx);
+ pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object;
+
+ /* change space accounting */
+ dsl_dir_diduse_space(pdd, -used, -comp, -uncomp, tx);
+ dsl_dir_diduse_space(dd, used, comp, uncomp, tx);
+ pivot_ds->ds_phys->ds_unique_bytes = unique;
+
+ err = 0;
+
+out:
+ if (RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock))
+ rw_exit(&dd->dd_pool->dp_config_rwlock);
+ if (hds)
+ dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
+ if (phds)
+ dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
+ if (ds && ds != pivot_ds)
+ dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+ if (pivot_ds)
+ dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
+ if (newnext_ds)
+ dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
+ if (name)
+ kmem_free(name, MAXPATHLEN);
+ return (err);
+}
+
+int
+dsl_dataset_promote(const char *name)
+{
+ dsl_dataset_t *ds;
+ int err;
+ dmu_object_info_t doi;
+
+ err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
+ if (err)
+ return (err);
+
+ err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
+ ds->ds_phys->ds_snapnames_zapobj, &doi);
+ if (err) {
+ dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
+ return (err);
+ }
+
+ /*
+ * Add in 128x the snapnames zapobj size, since we will be moving
+ * a bunch of snapnames to the promoted ds, and dirtying their
+ * bonus buffers.
+ */
+ err = dsl_dir_sync_task(ds->ds_dir, dsl_dataset_promote_sync, NULL,
+ (1<<20) + (doi.doi_physical_blks << (SPA_MINBLOCKSHIFT + 7)));
+ dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
+ return (err);
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c
index 8ffa145477..d7095cb0d3 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c
@@ -739,7 +739,7 @@ dsl_dir_space_available(dsl_dir_t *dd,
used += delta;
if (dd->dd_parent == NULL) {
- uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE);
+ uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
quota = MIN(quota, poolsize);
}
@@ -754,23 +754,19 @@ dsl_dir_space_available(dsl_dir_t *dd,
if (used > quota) {
/* over quota */
myspace = 0;
-#ifdef ZFS_DEBUG
- {
- /*
- * While it's OK to be a little over quota, if
- * we think we are using more space than there
- * is in the pool (which is already 6% more than
- * dsl_pool_adjustedsize()), something is very
- * wrong.
- */
- uint64_t space = spa_get_space(dd->dd_pool->dp_spa);
- ASSERT3U(used, <=, space);
- }
-#endif
+
+ /*
+ * While it's OK to be a little over quota, if
+ * we think we are using more space than there
+ * is in the pool (which is already 1.6% more than
+ * dsl_pool_adjustedsize()), something is very
+ * wrong.
+ */
+ ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa));
} else {
/*
- * the lesser of parent's space and the space
- * left in our quota
+ * the lesser of the space provided by our parent and
+ * the space left in our quota
*/
myspace = MIN(parentspace, quota - used);
}
@@ -1170,27 +1166,22 @@ dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
}
if (newpds != dd->dd_parent) {
- dsl_dir_t *ancestor;
- int64_t adelta;
- uint64_t myspace, avail;
-
- ancestor = closest_common_ancestor(dd, newpds);
+ /* is there enough space? */
+ uint64_t myspace =
+ MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
/* no rename into our descendent */
- if (ancestor == dd) {
+ if (closest_common_ancestor(dd, newpds) == dd) {
dsl_dir_close(newpds, FTAG);
rw_exit(&dp->dp_config_rwlock);
return (EINVAL);
}
- myspace = MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
- adelta = would_change(dd->dd_parent, -myspace, ancestor);
- avail = dsl_dir_space_available(newpds,
- ancestor, adelta, FALSE);
- if (avail < myspace) {
+ if (err = dsl_dir_transfer_possible(dd->dd_parent, newpds,
+ myspace)) {
dsl_dir_close(newpds, FTAG);
rw_exit(&dp->dp_config_rwlock);
- return (ENOSPC);
+ return (err);
}
/* The point of no (unsuccessful) return */
@@ -1227,3 +1218,19 @@ dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
rw_exit(&dp->dp_config_rwlock);
return (0);
}
+
+int
+dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
+{
+ dsl_dir_t *ancestor;
+ int64_t adelta;
+ uint64_t avail;
+
+ ancestor = closest_common_ancestor(sdd, tdd);
+ adelta = would_change(sdd, -space, ancestor);
+ avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
+ if (avail < space)
+ return (ENOSPC);
+
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c
index 77a1adb3b1..d12e1acfeb 100644
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c
@@ -241,7 +241,7 @@ dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree)
* cut the reservation in half to allow forward progress
* (e.g. make it possible to rm(1) files from a full pool).
*/
- space = spa_get_space(dp->dp_spa);
+ space = spa_get_dspace(dp->dp_spa);
resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1);
if (netfree)
resv >>= 1;
diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c
index fc33b1c591..0bb55f8b95 100644
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c
@@ -62,33 +62,28 @@ dodefault(const char *propname, int intsz, int numint, void *buf)
}
static int
-dsl_prop_get_impl(dsl_pool_t *dp, uint64_t ddobj, const char *propname,
+dsl_prop_get_impl(dsl_dir_t *dd, const char *propname,
int intsz, int numint, void *buf, char *setpoint)
{
- int err = 0;
- objset_t *mos = dp->dp_meta_objset;
+ int err = ENOENT;
if (setpoint)
setpoint[0] = '\0';
- ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
-
- while (ddobj != 0) {
- dsl_dir_t *dd;
- err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
- if (err)
- break;
+ /*
+ * Note: dd may be NULL, therefore we shouldn't dereference it
+ * ouside this loop.
+ */
+ for (; dd != NULL; dd = dd->dd_parent) {
+ objset_t *mos = dd->dd_pool->dp_meta_objset;
+ ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj,
propname, intsz, numint, buf);
if (err != ENOENT) {
if (setpoint)
dsl_dir_name(dd, setpoint);
- dsl_dir_close(dd, FTAG);
break;
}
- ASSERT3U(err, ==, ENOENT);
- ddobj = dd->dd_phys->dd_parent_obj;
- dsl_dir_close(dd, FTAG);
}
if (err == ENOENT)
err = dodefault(propname, intsz, numint, buf);
@@ -107,27 +102,21 @@ int
dsl_prop_register(dsl_dataset_t *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg)
{
- dsl_dir_t *dd;
+ dsl_dir_t *dd = ds->ds_dir;
uint64_t value;
dsl_prop_cb_record_t *cbr;
int err;
- dd = ds->ds_dir;
-
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
- err = dsl_prop_get_impl(dd->dd_pool, dd->dd_object, propname,
- 8, 1, &value, NULL);
- if (err == ENOENT) {
- err = 0;
- value = DSL_PROP_VALUE_UNDEFINED;
- }
+ err = dsl_prop_get_impl(dd, propname, 8, 1, &value, NULL);
if (err != 0) {
rw_exit(&dd->dd_pool->dp_config_rwlock);
return (err);
}
cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
+ cbr->cbr_ds = ds;
cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_SLEEP);
(void) strcpy((char *)cbr->cbr_propname, propname);
cbr->cbr_func = callback;
@@ -152,8 +141,7 @@ dsl_prop_get_ds(dsl_dir_t *dd, const char *propname,
int err;
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
- err = dsl_prop_get_impl(dd->dd_pool, dd->dd_object,
- propname, intsz, numints, buf, setpoint);
+ err = dsl_prop_get_impl(dd, propname, intsz, numints, buf, setpoint);
rw_exit(&dd->dd_pool->dp_config_rwlock);
return (err);
@@ -222,17 +210,16 @@ int
dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg)
{
- dsl_dir_t *dd;
+ dsl_dir_t *dd = ds->ds_dir;
dsl_prop_cb_record_t *cbr;
- dd = ds->ds_dir;
-
mutex_enter(&dd->dd_lock);
for (cbr = list_head(&dd->dd_prop_cbs);
cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- if (strcmp(cbr->cbr_propname, propname) == 0 &&
+ if (cbr->cbr_ds == ds &&
cbr->cbr_func == callback &&
- cbr->cbr_arg == cbarg)
+ cbr->cbr_arg == cbarg &&
+ strcmp(cbr->cbr_propname, propname) == 0)
break;
}
@@ -251,6 +238,27 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
return (0);
}
+/*
+ * Return the number of callbacks that are registered for this dataset.
+ */
+int
+dsl_prop_numcb(dsl_dataset_t *ds)
+{
+ dsl_dir_t *dd = ds->ds_dir;
+ dsl_prop_cb_record_t *cbr;
+ int num = 0;
+
+ mutex_enter(&dd->dd_lock);
+ for (cbr = list_head(&dd->dd_prop_cbs);
+ cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
+ if (cbr->cbr_ds == ds)
+ num++;
+ }
+ mutex_exit(&dd->dd_lock);
+
+ return (num);
+}
+
static void
dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
const char *propname, uint64_t value, int first)
@@ -330,9 +338,8 @@ dsl_prop_set_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
if (err == ENOENT) /* that's fine. */
err = 0;
if (err == 0 && isint) {
- err = dsl_prop_get_impl(dd->dd_pool,
- dd->dd_phys->dd_parent_obj, psa->name,
- 8, 1, &intval, NULL);
+ err = dsl_prop_get_impl(dd->dd_parent,
+ psa->name, 8, 1, &intval, NULL);
}
} else {
err = zap_update(mos, zapobj, psa->name,
@@ -380,7 +387,7 @@ int
dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
{
dsl_dataset_t *ds = os->os->os_dsl_dataset;
- dsl_dir_t *dd, *parent;
+ dsl_dir_t *dd = ds->ds_dir;
int err = 0;
dsl_pool_t *dp;
objset_t *mos;
@@ -395,15 +402,13 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
return (0);
}
- dd = ds->ds_dir;
-
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
dp = dd->dd_pool;
mos = dp->dp_meta_objset;
rw_enter(&dp->dp_config_rwlock, RW_READER);
- while (dd != NULL) {
+ for (; dd != NULL; dd = dd->dd_parent) {
dsl_dir_name(dd, setpoint);
for (zap_cursor_init(&zc, mos, dd->dd_phys->dd_props_zapobj);
@@ -418,7 +423,6 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
/*
* String property
*/
-
tmp = kmem_alloc(za.za_num_integers, KM_SLEEP);
err = zap_lookup(mos,
dd->dd_phys->dd_props_zapobj,
@@ -448,27 +452,9 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
}
zap_cursor_fini(&zc);
- if (err != ENOENT) {
- if (dd != ds->ds_dir)
- dsl_dir_close(dd, FTAG);
+ if (err != ENOENT)
break;
- } else {
- err = 0;
- }
-
- /*
- * Continue to parent.
- */
- if (dd->dd_phys->dd_parent_obj == 0)
- parent = NULL;
- else
- err = dsl_dir_open_obj(dp,
- dd->dd_phys->dd_parent_obj, NULL, FTAG, &parent);
- if (dd != ds->ds_dir)
- dsl_dir_close(dd, FTAG);
- if (err)
- break;
- dd = parent;
+ err = 0;
}
rw_exit(&dp->dp_config_rwlock);
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 2fe82c2e80..fca42558ef 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -185,33 +186,40 @@ spa_deactivate(spa_t *spa)
* in the CLOSED state. This will prep the pool before open/creation/import.
* All vdev validation is done by the vdev_alloc() routine.
*/
-static vdev_t *
-spa_config_parse(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int atype)
+static int
+spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
+ uint_t id, int atype)
{
nvlist_t **child;
uint_t c, children;
- vdev_t *vd;
+ int error;
- if ((vd = vdev_alloc(spa, nv, parent, id, atype)) == NULL)
- return (NULL);
+ if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
+ return (error);
- if (vd->vdev_ops->vdev_op_leaf)
- return (vd);
+ if ((*vdp)->vdev_ops->vdev_op_leaf)
+ return (0);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0) {
- vdev_free(vd);
- return (NULL);
+ vdev_free(*vdp);
+ *vdp = NULL;
+ return (EINVAL);
}
for (c = 0; c < children; c++) {
- if (spa_config_parse(spa, child[c], vd, c, atype) == NULL) {
- vdev_free(vd);
- return (NULL);
+ vdev_t *vd;
+ if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
+ atype)) != 0) {
+ vdev_free(*vdp);
+ *vdp = NULL;
+ return (error);
}
}
- return (vd);
+ ASSERT(*vdp != NULL);
+
+ return (0);
}
/*
@@ -220,6 +228,8 @@ spa_config_parse(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int atype)
static void
spa_unload(spa_t *spa)
{
+ int i;
+
/*
* Stop async tasks.
*/
@@ -254,10 +264,117 @@ spa_unload(spa_t *spa)
vdev_free(spa->spa_root_vdev);
ASSERT(spa->spa_root_vdev == NULL);
+ for (i = 0; i < spa->spa_nspares; i++)
+ vdev_free(spa->spa_spares[i]);
+ if (spa->spa_spares) {
+ kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
+ spa->spa_spares = NULL;
+ }
+ if (spa->spa_sparelist) {
+ nvlist_free(spa->spa_sparelist);
+ spa->spa_sparelist = NULL;
+ }
+
spa->spa_async_suspended = 0;
}
/*
+ * Load (or re-load) the current list of vdevs describing the active spares for
+ * this pool. When this is called, we have some form of basic information in
+ * 'spa_sparelist'. We parse this into vdevs, try to open them, and then
+ * re-generate a more complete list including status information.
+ */
+static void
+spa_load_spares(spa_t *spa)
+{
+ nvlist_t **spares;
+ uint_t nspares;
+ int i;
+
+ /*
+ * First, close and free any existing spare vdevs.
+ */
+ for (i = 0; i < spa->spa_nspares; i++) {
+ vdev_close(spa->spa_spares[i]);
+ vdev_free(spa->spa_spares[i]);
+ }
+ if (spa->spa_spares)
+ kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
+
+ if (spa->spa_sparelist == NULL)
+ nspares = 0;
+ else
+ VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+
+ spa->spa_nspares = (int)nspares;
+ spa->spa_spares = NULL;
+
+ if (nspares == 0)
+ return;
+
+ /*
+ * Construct the array of vdevs, opening them to get status in the
+ * process.
+ */
+ spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP);
+ for (i = 0; i < spa->spa_nspares; i++) {
+ vdev_t *vd;
+
+ VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
+ VDEV_ALLOC_SPARE) == 0);
+ ASSERT(vd != NULL);
+
+ spa->spa_spares[i] = vd;
+
+ if (vdev_open(vd) != 0)
+ continue;
+
+ vd->vdev_top = vd;
+ (void) vdev_validate_spare(vd);
+ }
+
+ /*
+ * Recompute the stashed list of spares, with status information
+ * this time.
+ */
+ VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+ DATA_TYPE_NVLIST_ARRAY) == 0);
+
+ spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP);
+ for (i = 0; i < spa->spa_nspares; i++)
+ spares[i] = vdev_config_generate(spa, spa->spa_spares[i],
+ B_TRUE, B_TRUE);
+ VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+ spares, spa->spa_nspares) == 0);
+ for (i = 0; i < spa->spa_nspares; i++)
+ nvlist_free(spares[i]);
+ kmem_free(spares, spa->spa_nspares * sizeof (void *));
+}
+
+static int
+load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
+{
+ dmu_buf_t *db;
+ char *packed = NULL;
+ size_t nvsize = 0;
+ int error;
+ *value = NULL;
+
+ VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
+ nvsize = *(uint64_t *)db->db_data;
+ dmu_buf_rele(db, FTAG);
+
+ packed = kmem_alloc(nvsize, KM_SLEEP);
+ error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed);
+ if (error == 0)
+ error = nvlist_unpack(packed, nvsize, value, 0);
+ kmem_free(packed, nvsize);
+
+ return (error);
+}
+
+/*
* Load an existing storage pool, using the pool's builtin spa_config as a
* source of configuration information.
*/
@@ -270,6 +387,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
uberblock_t *ub = &spa->spa_uberblock;
uint64_t config_cache_txg = spa->spa_config_txg;
uint64_t pool_guid;
+ uint64_t version;
zio_t *zio;
spa->spa_load_state = state;
@@ -280,6 +398,13 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
goto out;
}
+ /*
+ * Versioning wasn't explicitly added to the label until later, so if
+ * it's not present treat it as the initial version.
+ */
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
+ version = ZFS_VERSION_INITIAL;
+
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
&spa->spa_config_txg);
@@ -290,16 +415,17 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
}
/*
- * Parse the configuration into a vdev tree.
+ * Parse the configuration into a vdev tree. We explicitly set the
+ * value that will be returned by spa_version() since parsing the
+ * configuration requires knowing the version number.
*/
spa_config_enter(spa, RW_WRITER, FTAG);
- rvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
+ spa->spa_ubsync.ub_version = version;
+ error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
spa_config_exit(spa, FTAG);
- if (rvd == NULL) {
- error = EINVAL;
+ if (error != 0)
goto out;
- }
ASSERT(spa->spa_root_vdev == rvd);
ASSERT(spa_guid(spa) == pool_guid);
@@ -396,24 +522,9 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
}
if (!mosconfig) {
- dmu_buf_t *db;
- char *packed = NULL;
- size_t nvsize = 0;
- nvlist_t *newconfig = NULL;
-
- VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset,
- spa->spa_config_object, FTAG, &db));
- nvsize = *(uint64_t *)db->db_data;
- dmu_buf_rele(db, FTAG);
-
- packed = kmem_alloc(nvsize, KM_SLEEP);
- error = dmu_read(spa->spa_meta_objset,
- spa->spa_config_object, 0, nvsize, packed);
- if (error == 0)
- error = nvlist_unpack(packed, nvsize, &newconfig, 0);
- kmem_free(packed, nvsize);
+ nvlist_t *newconfig;
- if (error) {
+ if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) {
vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
error = EIO;
@@ -421,7 +532,6 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
}
spa_config_set(spa, newconfig);
-
spa_unload(spa);
spa_deactivate(spa);
spa_activate(spa);
@@ -439,6 +549,21 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
}
/*
+ * Load the bit that tells us to use the new accounting function
+ * (raid-z deflation). If we have an older pool, this will not
+ * be present.
+ */
+ error = zap_lookup(spa->spa_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
+ sizeof (uint64_t), 1, &spa->spa_deflate);
+ if (error != 0 && error != ENOENT) {
+ vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_CORRUPT_DATA);
+ error = EIO;
+ goto out;
+ }
+
+ /*
* Load the persistent error log. If we have an older pool, this will
* not be present.
*/
@@ -463,6 +588,32 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
}
/*
+ * Load any hot spares for this pool.
+ */
+ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object);
+ if (error != 0 && error != ENOENT) {
+ vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_CORRUPT_DATA);
+ error = EIO;
+ goto out;
+ }
+ if (error == 0) {
+ ASSERT(spa_version(spa) >= ZFS_VERSION_SPARES);
+ if (load_nvlist(spa, spa->spa_spares_object,
+ &spa->spa_sparelist) != 0) {
+ vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_CORRUPT_DATA);
+ error = EIO;
+ goto out;
+ }
+
+ spa_config_enter(spa, RW_WRITER, FTAG);
+ spa_load_spares(spa);
+ spa_config_exit(spa, FTAG);
+ }
+
+ /*
* Load the vdev state for all toplevel vdevs.
*/
vdev_load(rvd);
@@ -527,7 +678,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
error = 0;
out:
- if (error)
+ if (error && error != EBADF)
zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0);
spa->spa_load_state = SPA_LOAD_NONE;
spa->spa_ena = 0;
@@ -587,6 +738,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config)
* this is the case, the config cache is out of sync and
* we should remove the pool from the namespace.
*/
+ zfs_post_ok(spa, NULL);
spa_unload(spa);
spa_deactivate(spa);
spa_remove(spa);
@@ -678,6 +830,48 @@ spa_inject_delref(spa_t *spa)
mutex_exit(&spa_namespace_lock);
}
+static void
+spa_add_spares(spa_t *spa, nvlist_t *config)
+{
+ nvlist_t **spares;
+ uint_t i, nspares;
+ nvlist_t *nvroot;
+ uint64_t guid;
+ vdev_stat_t *vs;
+ uint_t vsc;
+
+ if (spa->spa_nspares == 0)
+ return;
+
+ VERIFY(nvlist_lookup_nvlist(config,
+ ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+ VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+ if (nspares != 0) {
+ VERIFY(nvlist_add_nvlist_array(nvroot,
+ ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+ VERIFY(nvlist_lookup_nvlist_array(nvroot,
+ ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+
+ /*
+ * Go through and find any spares which have since been
+ * repurposed as an active spare. If this is the case, update
+ * their status appropriately.
+ */
+ for (i = 0; i < nspares; i++) {
+ VERIFY(nvlist_lookup_uint64(spares[i],
+ ZPOOL_CONFIG_GUID, &guid) == 0);
+ if (spa_spare_inuse(guid)) {
+ VERIFY(nvlist_lookup_uint64_array(
+ spares[i], ZPOOL_CONFIG_STATS,
+ (uint64_t **)&vs, &vsc) == 0);
+ vs->vs_state = VDEV_STATE_CANT_OPEN;
+ vs->vs_aux = VDEV_AUX_SPARED;
+ }
+ }
+ }
+}
+
int
spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
{
@@ -687,10 +881,13 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
*config = NULL;
error = spa_open_common(name, &spa, FTAG, config);
- if (spa && *config != NULL)
+ if (spa && *config != NULL) {
VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT,
spa_get_errlog_size(spa)) == 0);
+ spa_add_spares(spa, *config);
+ }
+
/*
* We want to get the alternate root even for faulted pools, so we cheat
* and call spa_lookup() directly.
@@ -717,6 +914,65 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
}
/*
+ * Validate that the 'spares' array is well formed. We must have an array of
+ * nvlists, each which describes a valid leaf vdev.
+ */
+static int
+spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
+{
+ nvlist_t **spares;
+ uint_t i, nspares;
+ vdev_t *vd;
+ int error;
+
+ /*
+ * It's acceptable to have no spares specified.
+ */
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) != 0)
+ return (0);
+
+ if (nspares == 0)
+ return (EINVAL);
+
+ /*
+ * Make sure the pool is formatted with a version that supports hot
+ * spares.
+ */
+ if (spa_version(spa) < ZFS_VERSION_SPARES)
+ return (ENOTSUP);
+
+ for (i = 0; i < nspares; i++) {
+ if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0,
+ mode)) != 0)
+ return (error);
+
+ if (!vd->vdev_ops->vdev_op_leaf) {
+ vdev_free(vd);
+ return (EINVAL);
+ }
+
+ if ((error = vdev_open(vd)) != 0) {
+ vdev_free(vd);
+ return (error);
+ }
+
+ vd->vdev_top = vd;
+ if ((error = vdev_label_spare(vd, crtxg)) != 0) {
+ vdev_free(vd);
+ return (error);
+ }
+
+ VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID,
+ vd->vdev_guid) == 0);
+
+ vdev_free(vd);
+ }
+
+ return (0);
+}
+
+/*
* Pool Creation
*/
int
@@ -726,8 +982,10 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
vdev_t *rvd;
dsl_pool_t *dp;
dmu_tx_t *tx;
- int c, error;
+ int c, error = 0;
uint64_t txg = TXG_INITIAL;
+ nvlist_t **spares;
+ uint_t nspares;
/*
* If this pool already exists, return failure.
@@ -753,23 +1011,26 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
*/
spa_config_enter(spa, RW_WRITER, FTAG);
- rvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD);
+ error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
- ASSERT(spa->spa_root_vdev == rvd);
+ ASSERT(error != 0 || rvd != NULL);
+ ASSERT(error != 0 || spa->spa_root_vdev == rvd);
- if (rvd == NULL) {
+ if (error == 0 && rvd->vdev_children == 0)
error = EINVAL;
- } else {
- if ((error = vdev_create(rvd, txg)) == 0) {
- for (c = 0; c < rvd->vdev_children; c++)
- vdev_init(rvd->vdev_child[c], txg);
- vdev_config_dirty(rvd);
- }
+
+ if (error == 0 &&
+ (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
+ (error = spa_validate_spares(spa, nvroot, txg,
+ VDEV_ALLOC_ADD)) == 0) {
+ for (c = 0; c < rvd->vdev_children; c++)
+ vdev_init(rvd->vdev_child[c], txg);
+ vdev_config_dirty(rvd);
}
spa_config_exit(spa, FTAG);
- if (error) {
+ if (error != 0) {
spa_unload(spa);
spa_deactivate(spa);
spa_remove(spa);
@@ -777,6 +1038,21 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
return (error);
}
+ /*
+ * Get the list of spares, if specified.
+ */
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME,
+ KM_SLEEP) == 0);
+ VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+ spa_config_enter(spa, RW_WRITER, FTAG);
+ spa_load_spares(spa);
+ spa_config_exit(spa, FTAG);
+ spa->spa_sync_spares = B_TRUE;
+ }
+
spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg);
spa->spa_meta_objset = dp->dp_meta_objset;
@@ -795,6 +1071,14 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
cmn_err(CE_PANIC, "failed to add pool config");
}
+ /* Newly created pools are always deflated. */
+ spa->spa_deflate = TRUE;
+ if (zap_add(spa->spa_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
+ sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
+ cmn_err(CE_PANIC, "failed to add deflate");
+ }
+
/*
* Create the deferred-free bplist object. Turn off compression
* because sync-to-convergence takes longer if the blocksize
@@ -838,6 +1122,9 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot)
{
spa_t *spa;
int error;
+ nvlist_t *nvroot;
+ nvlist_t **spares;
+ uint_t nspares;
if (!(spa_mode & FWRITE))
return (EROFS);
@@ -864,7 +1151,25 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot)
*/
error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE);
- if (error) {
+ spa_config_enter(spa, RW_WRITER, FTAG);
+ /*
+ * Toss any existing sparelist, as it doesn't have any validity anymore,
+ * and conflicts with spa_has_spare().
+ */
+ if (spa->spa_sparelist) {
+ nvlist_free(spa->spa_sparelist);
+ spa->spa_sparelist = NULL;
+ spa_load_spares(spa);
+ }
+
+ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+ if (error == 0)
+ error = spa_validate_spares(spa, nvroot, -1ULL,
+ VDEV_ALLOC_SPARE);
+ spa_config_exit(spa, FTAG);
+
+ if (error != 0) {
spa_unload(spa);
spa_deactivate(spa);
spa_remove(spa);
@@ -873,6 +1178,26 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot)
}
/*
+ * Override any spares as specified by the user, as these may have
+ * correct device names/devids, etc.
+ */
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ if (spa->spa_sparelist)
+ VERIFY(nvlist_remove(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
+ else
+ VERIFY(nvlist_alloc(&spa->spa_sparelist,
+ NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+ spa_config_enter(spa, RW_WRITER, FTAG);
+ spa_load_spares(spa);
+ spa_config_exit(spa, FTAG);
+ spa->spa_sync_spares = B_TRUE;
+ }
+
+ /*
* Update the config cache to include the newly-imported pool.
*/
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
@@ -933,6 +1258,11 @@ spa_tryimport(nvlist_t *tryconfig)
poolname) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
state) == 0);
+
+ /*
+ * Add the list of hot spares.
+ */
+ spa_add_spares(spa, config);
}
spa_unload(spa);
@@ -1083,26 +1413,80 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
int c, error;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd, *tvd;
+ nvlist_t **spares;
+ uint_t i, nspares;
txg = spa_vdev_enter(spa);
- vd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD);
+ if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
+ VDEV_ALLOC_ADD)) != 0)
+ return (spa_vdev_exit(spa, NULL, txg, error));
- if (vd == NULL)
+ if ((error = spa_validate_spares(spa, nvroot, txg,
+ VDEV_ALLOC_ADD)) != 0)
+ return (spa_vdev_exit(spa, vd, txg, error));
+
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) != 0)
+ nspares = 0;
+
+ if (vd->vdev_children == 0 && nspares == 0)
return (spa_vdev_exit(spa, vd, txg, EINVAL));
- if ((error = vdev_create(vd, txg)) != 0)
- return (spa_vdev_exit(spa, vd, txg, error));
+ if (vd->vdev_children != 0) {
+ if ((error = vdev_create(vd, txg, B_FALSE)) != 0)
+ return (spa_vdev_exit(spa, vd, txg, error));
- /*
- * Transfer each new top-level vdev from vd to rvd.
- */
- for (c = 0; c < vd->vdev_children; c++) {
- tvd = vd->vdev_child[c];
- vdev_remove_child(vd, tvd);
- tvd->vdev_id = rvd->vdev_children;
- vdev_add_child(rvd, tvd);
- vdev_config_dirty(tvd);
+ /*
+ * Transfer each new top-level vdev from vd to rvd.
+ */
+ for (c = 0; c < vd->vdev_children; c++) {
+ tvd = vd->vdev_child[c];
+ vdev_remove_child(vd, tvd);
+ tvd->vdev_id = rvd->vdev_children;
+ vdev_add_child(rvd, tvd);
+ vdev_config_dirty(tvd);
+ }
+ }
+
+ if (nspares != 0) {
+ if (spa->spa_sparelist != NULL) {
+ nvlist_t **oldspares;
+ uint_t oldnspares;
+ nvlist_t **newspares;
+
+ VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0);
+
+ newspares = kmem_alloc(sizeof (void *) *
+ (nspares + oldnspares), KM_SLEEP);
+ for (i = 0; i < oldnspares; i++)
+ VERIFY(nvlist_dup(oldspares[i],
+ &newspares[i], KM_SLEEP) == 0);
+ for (i = 0; i < nspares; i++)
+ VERIFY(nvlist_dup(spares[i],
+ &newspares[i + oldnspares],
+ KM_SLEEP) == 0);
+
+ VERIFY(nvlist_remove(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
+
+ VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, newspares,
+ nspares + oldnspares) == 0);
+ for (i = 0; i < oldnspares + nspares; i++)
+ nvlist_free(newspares[i]);
+ kmem_free(newspares, (oldnspares + nspares) *
+ sizeof (void *));
+ } else {
+ VERIFY(nvlist_alloc(&spa->spa_sparelist,
+ NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+ }
+
+ spa_load_spares(spa);
+ spa->spa_sync_spares = B_TRUE;
}
/*
@@ -1147,7 +1531,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
int error;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
- vdev_ops_t *pvops = replacing ? &vdev_replacing_ops : &vdev_mirror_ops;
+ vdev_ops_t *pvops;
txg = spa_vdev_enter(spa);
@@ -1161,18 +1545,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
pvd = oldvd->vdev_parent;
- /*
- * The parent must be a mirror or the root, unless we're replacing;
- * in that case, the parent can be anything but another replacing vdev.
- */
- if (pvd->vdev_ops != &vdev_mirror_ops &&
- pvd->vdev_ops != &vdev_root_ops &&
- (!replacing || pvd->vdev_ops == &vdev_replacing_ops))
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
-
- newrootvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD);
-
- if (newrootvd == NULL || newrootvd->vdev_children != 1)
+ if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
+ VDEV_ALLOC_ADD)) != 0 || newrootvd->vdev_children != 1)
return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
newvd = newrootvd->vdev_child[0];
@@ -1180,9 +1554,43 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
if (!newvd->vdev_ops->vdev_op_leaf)
return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
- if ((error = vdev_create(newrootvd, txg)) != 0)
+ if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
return (spa_vdev_exit(spa, newrootvd, txg, error));
+ if (!replacing) {
+ /*
+ * For attach, the only allowable parent is a mirror or the root
+ * vdev.
+ */
+ if (pvd->vdev_ops != &vdev_mirror_ops &&
+ pvd->vdev_ops != &vdev_root_ops)
+ return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+
+ pvops = &vdev_mirror_ops;
+ } else {
+ /*
+ * Active hot spares can only be replaced by inactive hot
+ * spares.
+ */
+ if (pvd->vdev_ops == &vdev_spare_ops &&
+ pvd->vdev_child[1] == oldvd &&
+ !spa_has_spare(spa, newvd->vdev_guid))
+ return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+
+ /*
+ * If the source is a hot spare, and the parent isn't already a
+ * spare, then we want to create a new hot spare. Otherwise, we
+ * want to create a replacing vdev.
+ */
+ if (pvd->vdev_ops == &vdev_replacing_ops)
+ return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+ else if (pvd->vdev_ops != &vdev_spare_ops &&
+ newvd->vdev_isspare)
+ pvops = &vdev_spare_ops;
+ else
+ pvops = &vdev_replacing_ops;
+ }
+
/*
* Compare the new device size with the replaceable/attachable
* device size.
@@ -1214,8 +1622,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
}
/*
- * If the parent is not a mirror, or if we're replacing,
- * insert the new mirror/replacing vdev above oldvd.
+ * If the parent is not a mirror, or if we're replacing, insert the new
+ * mirror/replacing/spare vdev above oldvd.
*/
if (pvd->vdev_ops != pvops)
pvd = vdev_add_parent(oldvd, pvops);
@@ -1283,6 +1691,8 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
int c, t, error;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd, *pvd, *cvd, *tvd;
+ boolean_t unspare = B_FALSE;
+ uint64_t unspare_guid;
txg = spa_vdev_enter(spa);
@@ -1298,17 +1708,27 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
/*
* If replace_done is specified, only remove this device if it's
- * the first child of a replacing vdev.
- */
- if (replace_done &&
- (vd->vdev_id != 0 || pvd->vdev_ops != &vdev_replacing_ops))
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+ * the first child of a replacing vdev. For the 'spare' vdev, either
+ * disk can be removed.
+ */
+ if (replace_done) {
+ if (pvd->vdev_ops == &vdev_replacing_ops) {
+ if (vd->vdev_id != 0)
+ return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+ } else if (pvd->vdev_ops != &vdev_spare_ops) {
+ return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+ }
+ }
+
+ ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
+ spa_version(spa) >= ZFS_VERSION_SPARES);
/*
- * Only mirror and replacing vdevs support detach.
+ * Only mirror, replacing, and spare vdevs support detach.
*/
if (pvd->vdev_ops != &vdev_replacing_ops &&
- pvd->vdev_ops != &vdev_mirror_ops)
+ pvd->vdev_ops != &vdev_mirror_ops &&
+ pvd->vdev_ops != &vdev_spare_ops)
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
/*
@@ -1339,10 +1759,25 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
if (!dirty)
break;
}
- if (c == pvd->vdev_children)
+
+ /*
+ * If we are a replacing or spare vdev, then we can always detach the
+ * latter child, as that is how one cancels the operation.
+ */
+ if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) &&
+ c == pvd->vdev_children)
return (spa_vdev_exit(spa, NULL, txg, EBUSY));
/*
+ * If we are detaching the original disk from a spare, then it implies
+ * that the spare should become a real disk, and be removed from the
+ * active spare list for the pool.
+ */
+ if (pvd->vdev_ops == &vdev_spare_ops &&
+ vd->vdev_id == 0)
+ unspare = B_TRUE;
+
+ /*
* Erase the disk labels so the disk can be used for other things.
* This must be done after all other error cases are handled,
* but before we disembowel vd (so we can still do I/O to it).
@@ -1350,7 +1785,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
* it may be that the unwritability of the disk is the reason
* it's being detached!
*/
- error = vdev_label_init(vd, 0);
+ error = vdev_label_init(vd, 0, B_FALSE);
if (error)
dprintf("unable to erase labels on %s\n", vdev_description(vd));
@@ -1366,6 +1801,19 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
cvd = pvd->vdev_child[0];
/*
+ * If we need to remove the remaining child from the list of hot spares,
+ * do it now, marking the vdev as no longer a spare in the process. We
+ * must do this before vdev_remove_parent(), because that can change the
+ * GUID if it creates a new toplevel GUID.
+ */
+ if (unspare) {
+ ASSERT(cvd->vdev_isspare);
+ spa_spare_remove(cvd->vdev_guid);
+ cvd->vdev_isspare = B_FALSE;
+ unspare_guid = cvd->vdev_guid;
+ }
+
+ /*
* If the parent mirror/replacing vdev only has one child,
* the parent is no longer needed. Remove it from the tree.
*/
@@ -1408,7 +1856,104 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
dprintf("detached %s in txg %llu\n", vd->vdev_path, txg);
- return (spa_vdev_exit(spa, vd, txg, 0));
+ error = spa_vdev_exit(spa, vd, txg, 0);
+
+ /*
+ * If we are supposed to remove the given vdev from the list of spares,
+ * iterate over all pools in the system and replace it if it's present.
+ */
+ if (unspare) {
+ spa = NULL;
+ mutex_enter(&spa_namespace_lock);
+ while ((spa = spa_next(spa)) != NULL) {
+ if (spa->spa_state != POOL_STATE_ACTIVE)
+ continue;
+
+ (void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
+ }
+ mutex_exit(&spa_namespace_lock);
+ }
+
+ return (error);
+}
+
+/*
+ * Remove a device from the pool. Currently, this supports removing only hot
+ * spares.
+ */
+int
+spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
+{
+ vdev_t *vd;
+ nvlist_t **spares, *nv, **newspares;
+ uint_t i, j, nspares;
+ int ret = 0;
+
+ spa_config_enter(spa, RW_WRITER, FTAG);
+
+ vd = spa_lookup_by_guid(spa, guid);
+
+ nv = NULL;
+ if (spa->spa_spares != NULL &&
+ nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ for (i = 0; i < nspares; i++) {
+ uint64_t theguid;
+
+ VERIFY(nvlist_lookup_uint64(spares[i],
+ ZPOOL_CONFIG_GUID, &theguid) == 0);
+ if (theguid == guid) {
+ nv = spares[i];
+ break;
+ }
+ }
+ }
+
+ /*
+ * We only support removing a hot spare, and only if it's not currently
+ * in use in this pool.
+ */
+ if (nv == NULL && vd == NULL) {
+ ret = ENOENT;
+ goto out;
+ }
+
+ if (nv == NULL && vd != NULL) {
+ ret = ENOTSUP;
+ goto out;
+ }
+
+ if (!unspare && nv != NULL && vd != NULL) {
+ ret = EBUSY;
+ goto out;
+ }
+
+ if (nspares == 1) {
+ newspares = NULL;
+ } else {
+ newspares = kmem_alloc((nspares - 1) * sizeof (void *),
+ KM_SLEEP);
+ for (i = 0, j = 0; i < nspares; i++) {
+ if (spares[i] != nv)
+ VERIFY(nvlist_dup(spares[i],
+ &newspares[j++], KM_SLEEP) == 0);
+ }
+ }
+
+ VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+ DATA_TYPE_NVLIST_ARRAY) == 0);
+ VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+ newspares, nspares - 1) == 0);
+ for (i = 0; i < nspares - 1; i++)
+ nvlist_free(newspares[i]);
+ kmem_free(newspares, (nspares - 1) * sizeof (void *));
+ spa_load_spares(spa);
+ spa->spa_sync_spares = B_TRUE;
+
+out:
+ spa_config_exit(spa, FTAG);
+
+ return (ret);
}
/*
@@ -1446,15 +1991,31 @@ static void
spa_vdev_replace_done(spa_t *spa)
{
vdev_t *vd;
+ vdev_t *pvd;
uint64_t guid;
+ uint64_t pguid = 0;
spa_config_enter(spa, RW_READER, FTAG);
while ((vd = spa_vdev_replace_done_hunt(spa->spa_root_vdev)) != NULL) {
guid = vd->vdev_guid;
+ /*
+ * If we have just finished replacing a hot spared device, then
+ * we need to detach the parent's first child (the original hot
+ * spare) as well.
+ */
+ pvd = vd->vdev_parent;
+ if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
+ pvd->vdev_id == 0) {
+ ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
+ ASSERT(pvd->vdev_parent->vdev_children == 2);
+ pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid;
+ }
spa_config_exit(spa, FTAG);
if (spa_vdev_detach(spa, guid, B_TRUE) != 0)
return;
+ if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0)
+ return;
spa_config_enter(spa, RW_READER, FTAG);
}
@@ -1475,8 +2036,36 @@ spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
txg = spa_vdev_enter(spa);
- if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL)
- return (spa_vdev_exit(spa, NULL, txg, ENOENT));
+ if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
+ /*
+ * Determine if this is a reference to a hot spare. In that
+ * case, update the path as stored in the spare list.
+ */
+ nvlist_t **spares;
+ uint_t i, nspares;
+ if (spa->spa_sparelist != NULL) {
+ VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+ ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+ for (i = 0; i < nspares; i++) {
+ uint64_t theguid;
+ VERIFY(nvlist_lookup_uint64(spares[i],
+ ZPOOL_CONFIG_GUID, &theguid) == 0);
+ if (theguid == guid)
+ break;
+ }
+
+ if (i == nspares)
+ return (spa_vdev_exit(spa, NULL, txg, ENOENT));
+
+ VERIFY(nvlist_add_string(spares[i],
+ ZPOOL_CONFIG_PATH, newpath) == 0);
+ spa_load_spares(spa);
+ spa->spa_sync_spares = B_TRUE;
+ return (spa_vdev_exit(spa, NULL, txg, 0));
+ } else {
+ return (spa_vdev_exit(spa, NULL, txg, ENOENT));
+ }
+ }
if (!vd->vdev_ops->vdev_op_leaf)
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
@@ -2049,41 +2638,92 @@ spa_sync_deferred_frees(spa_t *spa, uint64_t txg)
}
static void
-spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
+spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
{
- nvlist_t *config;
char *packed = NULL;
size_t nvsize = 0;
dmu_buf_t *db;
- if (list_is_empty(&spa->spa_dirty_list))
- return;
-
- config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE);
-
- if (spa->spa_config_syncing)
- nvlist_free(spa->spa_config_syncing);
- spa->spa_config_syncing = config;
-
- VERIFY(nvlist_size(config, &nvsize, NV_ENCODE_XDR) == 0);
+ VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
packed = kmem_alloc(nvsize, KM_SLEEP);
- VERIFY(nvlist_pack(config, &packed, &nvsize, NV_ENCODE_XDR,
+ VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
KM_SLEEP) == 0);
- dmu_write(spa->spa_meta_objset, spa->spa_config_object, 0, nvsize,
- packed, tx);
+ dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx);
kmem_free(packed, nvsize);
- VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset,
- spa->spa_config_object, FTAG, &db));
+ VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
dmu_buf_will_dirty(db, tx);
*(uint64_t *)db->db_data = nvsize;
dmu_buf_rele(db, FTAG);
}
+static void
+spa_sync_spares(spa_t *spa, dmu_tx_t *tx)
+{
+ nvlist_t *nvroot;
+ nvlist_t **spares;
+ int i;
+
+ if (!spa->spa_sync_spares)
+ return;
+
+ /*
+ * Update the MOS nvlist describing the list of available spares.
+ * spa_validate_spares() will have already made sure this nvlist is
+ * valid and the vdevs are labelled appropriately.
+ */
+ if (spa->spa_spares_object == 0) {
+ spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset,
+ DMU_OT_PACKED_NVLIST, 1 << 14,
+ DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
+ VERIFY(zap_update(spa->spa_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES,
+ sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0);
+ }
+
+ VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ if (spa->spa_nspares == 0) {
+ VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ NULL, 0) == 0);
+ } else {
+ spares = kmem_alloc(spa->spa_nspares * sizeof (void *),
+ KM_SLEEP);
+ for (i = 0; i < spa->spa_nspares; i++)
+ spares[i] = vdev_config_generate(spa,
+ spa->spa_spares[i], B_FALSE, B_TRUE);
+ VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ spares, spa->spa_nspares) == 0);
+ for (i = 0; i < spa->spa_nspares; i++)
+ nvlist_free(spares[i]);
+ kmem_free(spares, spa->spa_nspares * sizeof (void *));
+ }
+
+ spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx);
+
+ spa->spa_sync_spares = B_FALSE;
+}
+
+static void
+spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
+{
+ nvlist_t *config;
+
+ if (list_is_empty(&spa->spa_dirty_list))
+ return;
+
+ config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE);
+
+ if (spa->spa_config_syncing)
+ nvlist_free(spa->spa_config_syncing);
+ spa->spa_config_syncing = config;
+
+ spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
+}
+
/*
* Sync the specified transaction group. New blocks may be dirtied as
* part of the process, so we iterate until it converges.
@@ -2109,6 +2749,29 @@ spa_sync(spa_t *spa, uint64_t txg)
VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj));
+ tx = dmu_tx_create_assigned(dp, txg);
+
+ /*
+ * If we are upgrading to ZFS_VERSION_RAIDZ_DEFLATE this txg,
+ * set spa_deflate if we have no raid-z vdevs.
+ */
+ if (spa->spa_ubsync.ub_version < ZFS_VERSION_RAIDZ_DEFLATE &&
+ spa->spa_uberblock.ub_version >= ZFS_VERSION_RAIDZ_DEFLATE) {
+ int i;
+
+ for (i = 0; i < rvd->vdev_children; i++) {
+ vd = rvd->vdev_child[i];
+ if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
+ break;
+ }
+ if (i == rvd->vdev_children) {
+ spa->spa_deflate = TRUE;
+ VERIFY(0 == zap_add(spa->spa_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
+ sizeof (uint64_t), 1, &spa->spa_deflate, tx));
+ }
+ }
+
/*
* If anything has changed in this txg, push the deferred frees
* from the previous txg. If not, leave them alone so that we
@@ -2124,12 +2787,9 @@ spa_sync(spa_t *spa, uint64_t txg)
do {
spa->spa_sync_pass++;
- tx = dmu_tx_create_assigned(dp, txg);
spa_sync_config_object(spa, tx);
- dmu_tx_commit(tx);
-
+ spa_sync_spares(spa, tx);
spa_errlog_sync(spa, txg);
-
dsl_pool_sync(dp, txg);
dirty_vdevs = 0;
@@ -2138,10 +2798,7 @@ spa_sync(spa_t *spa, uint64_t txg)
dirty_vdevs++;
}
- tx = dmu_tx_create_assigned(dp, txg);
bplist_sync(bpl, tx);
- dmu_tx_commit(tx);
-
} while (dirty_vdevs);
bplist_close(bpl);
@@ -2175,6 +2832,8 @@ spa_sync(spa_t *spa, uint64_t txg)
VERIFY(vdev_config_sync(rvd, txg) == 0);
}
+ dmu_tx_commit(tx);
+
/*
* Clear the dirty config list.
*/
@@ -2219,7 +2878,7 @@ spa_sync(spa_t *spa, uint64_t txg)
/*
* It had better be the case that we didn't dirty anything
- * since spa_sync_labels().
+ * since vdev_config_sync().
*/
ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
@@ -2319,4 +2978,18 @@ spa_upgrade(spa_t *spa)
vdev_config_dirty(spa->spa_root_vdev);
spa_config_exit(spa, FTAG);
+
+ txg_wait_synced(spa_get_dsl(spa), 0);
+}
+
+boolean_t
+spa_has_spare(spa_t *spa, uint64_t guid)
+{
+ int i;
+
+ for (i = 0; i < spa->spa_nspares; i++)
+ if (spa->spa_spares[i]->vdev_guid == guid)
+ return (B_TRUE);
+
+ return (B_FALSE);
}
diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c
index 906f2e5470..03ba60b0e3 100644
--- a/usr/src/uts/common/fs/zfs/spa_config.c
+++ b/usr/src/uts/common/fs/zfs/spa_config.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -279,7 +280,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
- spa->spa_uberblock.ub_version) == 0);
+ spa_version(spa)) == 0);
VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
spa_name(spa)) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
@@ -294,10 +295,13 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
vd->vdev_top->vdev_guid) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
vd->vdev_guid) == 0);
+ if (vd->vdev_isspare)
+ VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
+ 1ULL) == 0);
vd = vd->vdev_top; /* label contains top config */
}
- nvroot = vdev_config_generate(vd, getstats);
+ nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE);
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
nvlist_free(nvroot);
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 11267729d9..3d2ec9f0b7 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -175,6 +175,9 @@ static kcondvar_t spa_namespace_cv;
static int spa_active_count;
static int spa_max_replication_override = SPA_DVAS_PER_BP;
+static avl_tree_t spa_spare_avl;
+static kmutex_t spa_spare_lock;
+
kmem_cache_t *spa_buffer_pool;
int spa_mode;
@@ -338,6 +341,99 @@ spa_refcount_zero(spa_t *spa)
/*
* ==========================================================================
+ * SPA spare tracking
+ * ==========================================================================
+ */
+
+/*
+ * We track spare information on a global basis. This allows us to do two
+ * things: determine when a spare is no longer referenced by any active pool,
+ * and (quickly) determine if a spare is currently in use in another pool on the
+ * system.
+ */
+typedef struct spa_spare {
+ uint64_t spare_guid;
+ avl_node_t spare_avl;
+ int spare_count;
+} spa_spare_t;
+
+static int
+spa_spare_compare(const void *a, const void *b)
+{
+ const spa_spare_t *sa = a;
+ const spa_spare_t *sb = b;
+
+ if (sa->spare_guid < sb->spare_guid)
+ return (-1);
+ else if (sa->spare_guid > sb->spare_guid)
+ return (1);
+ else
+ return (0);
+}
+
+void
+spa_spare_add(uint64_t guid)
+{
+ avl_index_t where;
+ spa_spare_t search;
+ spa_spare_t *spare;
+
+ mutex_enter(&spa_spare_lock);
+
+ search.spare_guid = guid;
+ if ((spare = avl_find(&spa_spare_avl, &search, &where)) != NULL) {
+ spare->spare_count++;
+ } else {
+ spare = kmem_alloc(sizeof (spa_spare_t), KM_SLEEP);
+ spare->spare_guid = guid;
+ spare->spare_count = 1;
+ avl_insert(&spa_spare_avl, spare, where);
+ }
+
+ mutex_exit(&spa_spare_lock);
+}
+
+void
+spa_spare_remove(uint64_t guid)
+{
+ spa_spare_t search;
+ spa_spare_t *spare;
+ avl_index_t where;
+
+ mutex_enter(&spa_spare_lock);
+
+ search.spare_guid = guid;
+ spare = avl_find(&spa_spare_avl, &search, &where);
+
+ ASSERT(spare != NULL);
+
+ if (--spare->spare_count == 0) {
+ avl_remove(&spa_spare_avl, spare);
+ kmem_free(spare, sizeof (spa_spare_t));
+ }
+
+ mutex_exit(&spa_spare_lock);
+}
+
+boolean_t
+spa_spare_inuse(uint64_t guid)
+{
+ spa_spare_t search;
+ avl_index_t where;
+ boolean_t ret;
+
+ mutex_enter(&spa_spare_lock);
+
+ search.spare_guid = guid;
+ ret = (avl_find(&spa_spare_avl, &search, &where) != NULL);
+
+ mutex_exit(&spa_spare_lock);
+
+ return (ret);
+}
+
+/*
+ * ==========================================================================
* SPA config locking
* ==========================================================================
*/
@@ -779,7 +875,7 @@ spa_metaslab_class_select(spa_t *spa)
}
/*
- * Return pool-wide allocated space.
+ * Return how much space is allocated in the pool (ie. sum of all asize)
*/
uint64_t
spa_get_alloc(spa_t *spa)
@@ -788,7 +884,7 @@ spa_get_alloc(spa_t *spa)
}
/*
- * Return pool-wide allocated space.
+ * Return how much (raid-z inflated) space there is in the pool.
*/
uint64_t
spa_get_space(spa_t *spa)
@@ -796,6 +892,18 @@ spa_get_space(spa_t *spa)
return (spa->spa_root_vdev->vdev_stat.vs_space);
}
+/*
+ * Return the amount of raid-z-deflated space in the pool.
+ */
+uint64_t
+spa_get_dspace(spa_t *spa)
+{
+ if (spa->spa_deflate)
+ return (spa->spa_root_vdev->vdev_stat.vs_dspace);
+ else
+ return (spa->spa_root_vdev->vdev_stat.vs_space);
+}
+
/* ARGSUSED */
uint64_t
spa_get_asize(spa_t *spa, uint64_t lsize)
@@ -828,6 +936,23 @@ spa_max_replication(spa_t *spa)
return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
}
+uint64_t
+bp_get_dasize(spa_t *spa, const blkptr_t *bp)
+{
+ int sz = 0, i;
+
+ if (!spa->spa_deflate)
+ return (BP_GET_ASIZE(bp));
+
+ for (i = 0; i < SPA_DVAS_PER_BP; i++) {
+ vdev_t *vd =
+ vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[i]));
+ sz += (DVA_GET_ASIZE(&bp->blk_dva[i]) >> SPA_MINBLOCKSHIFT) *
+ vd->vdev_deflate_ratio;
+ }
+ return (sz);
+}
+
/*
* ==========================================================================
* Initialization and Termination
@@ -864,6 +989,9 @@ spa_init(int mode)
avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
offsetof(spa_t, spa_avl));
+ avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_spare_t),
+ offsetof(spa_spare_t, spare_avl));
+
spa_mode = mode;
refcount_init();
@@ -885,6 +1013,7 @@ spa_fini(void)
refcount_fini();
avl_destroy(&spa_namespace_avl);
+ avl_destroy(&spa_spare_avl);
cv_destroy(&spa_namespace_cv);
mutex_destroy(&spa_namespace_lock);
diff --git a/usr/src/uts/common/fs/zfs/sys/bplist.h b/usr/src/uts/common/fs/zfs/sys/bplist.h
index c716fe7aa6..b4c83765c8 100644
--- a/usr/src/uts/common/fs/zfs/sys/bplist.h
+++ b/usr/src/uts/common/fs/zfs/sys/bplist.h
@@ -45,8 +45,12 @@ typedef struct bplist_phys {
*/
uint64_t bpl_entries;
uint64_t bpl_bytes;
+ uint64_t bpl_comp;
+ uint64_t bpl_uncomp;
} bplist_phys_t;
+#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
+
typedef struct bplist_q {
blkptr_t bpq_blk;
void *bpq_next;
@@ -56,8 +60,9 @@ typedef struct bplist {
kmutex_t bpl_lock;
objset_t *bpl_mos;
uint64_t bpl_object;
- int bpl_blockshift;
- int bpl_bpshift;
+ uint8_t bpl_blockshift;
+ uint8_t bpl_bpshift;
+ uint8_t bpl_havecomp;
bplist_q_t *bpl_queue;
bplist_phys_t *bpl_phys;
dmu_buf_t *bpl_dbuf;
@@ -74,6 +79,8 @@ extern int bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx);
extern void bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp);
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
+extern int bplist_space(bplist_t *bpl,
+ uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index 78dd9632e6..88b59a1618 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -177,12 +177,17 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
*/
typedef void dmu_byteswap_func_t(void *buf, size_t size);
+/*
+ * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
+ */
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
#define DMU_POOL_ERRLOG_LAST "errlog_last"
+#define DMU_POOL_SPARES "spares"
+#define DMU_POOL_DEFLATE "deflate"
/*
* Allocate an object from this objset. The range of object numbers
diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h
index d2c9d4f3bf..48b06a6749 100644
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h
@@ -75,6 +75,9 @@ extern "C" {
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
+#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
+ (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
+
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
struct dmu_buf_impl;
@@ -87,6 +90,9 @@ enum dnode_dirtycontext {
DN_DIRTY_SYNC
};
+/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
+#define DNODE_FLAG_USED_BYTES (1<<0)
+
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
@@ -95,14 +101,14 @@ typedef struct dnode_phys {
uint8_t dn_bonustype; /* type of data in bonus buffer */
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
uint8_t dn_compress; /* ZIO_COMPRESS type */
- uint8_t dn_pad1[1];
+ uint8_t dn_flags; /* DNODE_FLAG_* */
uint16_t dn_datablkszsec; /* data block size in 512b sectors */
uint16_t dn_bonuslen; /* length of dn_bonus */
uint8_t dn_pad2[4];
/* accounting is protected by dn_dirty_mtx */
uint64_t dn_maxblkid; /* largest allocated block ID */
- uint64_t dn_secphys; /* 512b sectors of disk space used */
+ uint64_t dn_used; /* bytes (or sectors) of disk space */
uint64_t dn_pad3[4];
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
index 2a4ce242dc..912445b160 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -44,6 +44,15 @@ struct dsl_pool;
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
+#define DS_FLAG_INCONSISTENT (1ULL<<0)
+/*
+ * NB: nopromote can not yet be set, but we want support for it in this
+ * on-disk version, so that we don't need to upgrade for it later. It
+ * will be needed when we implement 'zfs split' (where the split off
+ * clone should not be promoted).
+ */
+#define DS_FLAG_NOPROMOTE (1ULL<<1)
+
typedef struct dsl_dataset_phys {
uint64_t ds_dir_obj;
uint64_t ds_prev_snap_obj;
@@ -65,9 +74,9 @@ typedef struct dsl_dataset_phys {
*/
uint64_t ds_fsid_guid;
uint64_t ds_guid;
- uint64_t ds_inconsistent; /* boolean */
+ uint64_t ds_flags;
blkptr_t ds_bp;
- uint64_t ds_pad[8]; /* pad out to 256 bytes for good measure */
+ uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */
} dsl_dataset_phys_t;
typedef struct dsl_dataset {
@@ -119,6 +128,7 @@ int dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx);
int dsl_dataset_rollback(const char *name);
int dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx);
int dsl_dataset_rename(const char *name, const char *newname);
+int dsl_dataset_promote(const char *name);
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
void *p, dsl_dataset_evict_func_t func);
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
index 5c23fdc497..123d6d128f 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
@@ -121,6 +121,7 @@ int dsl_dir_sync_task(dsl_dir_t *dd,
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
int dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx);
+int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
#ifdef ZFS_DEBUG
#define dprintf_dd(dd, fmt, ...) do { \
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
index bf03cfa799..95094641c5 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -42,10 +41,9 @@ struct dsl_dataset;
/* The callback func may not call into the DMU or DSL! */
typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
-#define DSL_PROP_VALUE_UNDEFINED (-1ULL)
-
typedef struct dsl_prop_cb_record {
list_node_t cbr_node; /* link on dd_prop_cbs */
+ struct dsl_dataset *cbr_ds;
const char *cbr_propname;
dsl_prop_changed_cb_t *cbr_func;
void *cbr_arg;
@@ -55,6 +53,7 @@ int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
+int dsl_prop_numcb(struct dsl_dataset *ds);
int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index a51cfd524f..829c025af2 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -232,7 +232,11 @@ typedef struct blkptr {
#define BP_GET_ASIZE(bp) \
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+ DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+
+#define BP_GET_UCSIZE(bp) \
+ ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
+ BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
#define BP_GET_NDVAS(bp) \
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
@@ -326,8 +330,14 @@ extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
int replacing);
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
+extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
+/* spare state (which is global across all pools) */
+extern void spa_spare_add(uint64_t guid);
+extern void spa_spare_remove(uint64_t guid);
+extern boolean_t spa_spare_inuse(uint64_t guid);
+
/* scrubbing */
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force);
extern void spa_scrub_suspend(spa_t *spa);
@@ -390,12 +400,14 @@ extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
+extern uint64_t spa_version(spa_t *spa);
extern int spa_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
struct metaslab_class;
extern struct metaslab_class *spa_metaslab_class_select(spa_t *spa);
extern uint64_t spa_get_alloc(spa_t *spa);
extern uint64_t spa_get_space(spa_t *spa);
+extern uint64_t spa_get_dspace(spa_t *spa);
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
extern uint64_t spa_version(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
@@ -412,6 +424,8 @@ extern void spa_freeze(spa_t *spa);
extern void spa_upgrade(spa_t *spa);
extern void spa_evict_all(void);
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid);
+extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
+extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
/* error handling */
struct zbookmark;
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index e4df4c9eab..9a2fea9c21 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -65,7 +65,6 @@ struct spa {
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
uint64_t spa_config_txg; /* txg of last config change */
- spa_config_lock_t spa_config_lock; /* configuration changes */
kmutex_t spa_config_cache_lock; /* for spa_config RW_READER */
int spa_sync_pass; /* iterate-to-convergence */
int spa_state; /* pool state */
@@ -84,6 +83,11 @@ struct spa {
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
list_t spa_dirty_list; /* vdevs with dirty labels */
+ uint64_t spa_spares_object; /* MOS object for spare list */
+ nvlist_t *spa_sparelist; /* cached spare config */
+ vdev_t **spa_spares; /* available hot spares */
+ int spa_nspares; /* number of hot spares */
+ boolean_t spa_sync_spares; /* sync the spares list */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_syncing_txg; /* txg currently syncing */
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
@@ -122,11 +126,13 @@ struct spa {
kmutex_t spa_errlist_lock; /* error list/ereport lock */
avl_tree_t spa_errlist_last; /* last error list */
avl_tree_t spa_errlist_scrub; /* scrub error list */
+ uint64_t spa_deflate; /* should we deflate? */
/*
* spa_refcnt must be the last element because it changes size based on
* compilation options. In order for the MDB module to function
* correctly, the other fields must remain in the same location.
*/
+ spa_config_lock_t spa_config_lock; /* configuration changes */
refcount_t spa_refcount; /* number of opens */
};
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index 5a2e6750a0..760aeae560 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -60,9 +60,10 @@ typedef struct vdev_knob {
extern int vdev_open(vdev_t *);
extern int vdev_validate(vdev_t *);
extern void vdev_close(vdev_t *);
-extern int vdev_create(vdev_t *, uint64_t txg);
+extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
extern void vdev_init(vdev_t *, uint64_t txg);
extern void vdev_reopen(vdev_t *);
+extern int vdev_validate_spare(vdev_t *);
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
@@ -85,8 +86,8 @@ extern void vdev_propagate_state(vdev_t *vd);
extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
vdev_aux_t aux);
-extern void vdev_space_update(vdev_t *vd, uint64_t space_delta,
- uint64_t alloc_delta);
+extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
+ int64_t alloc_delta);
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
@@ -116,7 +117,8 @@ extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd);
extern int vdev_config_sync(vdev_t *vd, uint64_t txg);
-extern nvlist_t *vdev_config_generate(vdev_t *vd, int getstats);
+extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
+ boolean_t getstats, boolean_t isspare);
/*
* Label routines
@@ -125,7 +127,8 @@ struct uberblock;
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
-int vdev_label_init(vdev_t *vd, uint64_t create_txg);
+int vdev_label_init(vdev_t *vd, uint64_t create_txg, boolean_t isreplacing);
+int vdev_label_spare(vdev_t *vd, uint64_t create_txg);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 1b18df8cda..75e642a495 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -153,6 +153,7 @@ struct vdev {
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
uint8_t vdev_reopen_wanted; /* async reopen wanted? */
list_node_t vdev_dirty_node; /* config dirty list */
+ uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
/*
* Leaf vdev state.
@@ -162,6 +163,7 @@ struct vdev {
txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
uint64_t vdev_wholedisk; /* true if this is a whole disk */
uint64_t vdev_offline; /* device taken offline? */
+ uint64_t vdev_nparity; /* number of parity devices for raidz */
char *vdev_path; /* vdev path (if any) */
char *vdev_devid; /* vdev devid (if any) */
uint64_t vdev_fault_arg; /* fault injection paramater */
@@ -170,6 +172,7 @@ struct vdev {
uint8_t vdev_cache_active; /* vdev_cache and vdev_queue */
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
uint8_t vdev_detached; /* device detached? */
+ uint64_t vdev_isspare; /* was a hot spare */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
uint64_t vdev_not_present; /* not present during import */
@@ -245,12 +248,13 @@ typedef struct vdev_label {
#define VDEV_ALLOC_LOAD 0
#define VDEV_ALLOC_ADD 1
+#define VDEV_ALLOC_SPARE 2
/*
* Allocate or free a vdev
*/
-extern vdev_t *vdev_alloc(spa_t *spa, nvlist_t *config, vdev_t *parent,
- uint_t id, int alloctype);
+extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
+ vdev_t *parent, uint_t id, int alloctype);
extern void vdev_free(vdev_t *vd);
/*
@@ -280,6 +284,7 @@ extern vdev_ops_t vdev_raidz_ops;
extern vdev_ops_t vdev_disk_ops;
extern vdev_ops_t vdev_file_ops;
extern vdev_ops_t vdev_missing_ops;
+extern vdev_ops_t vdev_spare_ops;
/*
* Common size functions
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 726852cb4d..0bbd073fd7 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -49,6 +50,7 @@ static vdev_ops_t *vdev_ops_table[] = {
&vdev_raidz_ops,
&vdev_mirror_ops,
&vdev_replacing_ops,
+ &vdev_spare_ops,
&vdev_disk_ops,
&vdev_file_ops,
&vdev_missing_ops,
@@ -324,6 +326,9 @@ vdev_free_common(vdev_t *vd)
if (vd->vdev_devid)
spa_strfree(vd->vdev_devid);
+ if (vd->vdev_isspare)
+ spa_spare_remove(vd->vdev_guid);
+
txg_list_destroy(&vd->vdev_ms_list);
txg_list_destroy(&vd->vdev_dtl_list);
mutex_enter(&vd->vdev_dtl_lock);
@@ -345,8 +350,9 @@ vdev_free_common(vdev_t *vd)
* creating a new vdev or loading an existing one - the behavior is slightly
* different for each case.
*/
-vdev_t *
-vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
+int
+vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
+ int alloctype)
{
vdev_ops_t *ops;
char *type;
@@ -356,10 +362,10 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
ASSERT(spa_config_held(spa, RW_WRITER));
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
- return (NULL);
+ return (EINVAL);
if ((ops = vdev_getops(type)) == NULL)
- return (NULL);
+ return (EINVAL);
/*
* If this is a load, get the vdev guid from the nvlist.
@@ -370,12 +376,21 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) ||
label_id != id)
- return (NULL);
+ return (EINVAL);
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
- return (NULL);
+ return (EINVAL);
+ } else if (alloctype == VDEV_ALLOC_SPARE) {
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
+ return (EINVAL);
}
+ /*
+ * The first allocated vdev must be of type 'root'.
+ */
+ if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL)
+ return (EINVAL);
+
vd = vdev_alloc_common(spa, id, guid, ops);
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0)
@@ -384,6 +399,41 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
vd->vdev_devid = spa_strdup(vd->vdev_devid);
/*
+ * Set the nparity propery for RAID-Z vdevs.
+ */
+ if (ops == &vdev_raidz_ops) {
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
+ &vd->vdev_nparity) == 0) {
+ /*
+ * Currently, we can only support 2 parity devices.
+ */
+ if (vd->vdev_nparity > 2)
+ return (EINVAL);
+ /*
+ * Older versions can only support 1 parity device.
+ */
+ if (vd->vdev_nparity == 2 &&
+ spa_version(spa) < ZFS_VERSION_RAID6)
+ return (ENOTSUP);
+
+ } else {
+ /*
+ * We require the parity to be specified for SPAs that
+ * support multiple parity levels.
+ */
+ if (spa_version(spa) >= ZFS_VERSION_RAID6)
+ return (EINVAL);
+
+ /*
+ * Otherwise, we default to 1 parity device for RAID-Z.
+ */
+ vd->vdev_nparity = 1;
+ }
+ } else {
+ vd->vdev_nparity = 0;
+ }
+
+ /*
* Set the whole_disk property. If it's not specified, leave the value
* as -1.
*/
@@ -404,6 +454,15 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
/*
+ * Look for the 'is_spare' flag. If this is the case, then we are a
+ * repurposed hot spare.
+ */
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
+ &vd->vdev_isspare);
+ if (vd->vdev_isspare)
+ spa_spare_add(vd->vdev_guid);
+
+ /*
* If we're a top-level vdev, try to load the allocation parameters.
*/
if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) {
@@ -430,7 +489,9 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
*/
vdev_add_child(parent, vd);
- return (vd);
+ *vdp = vd;
+
+ return (0);
}
void
@@ -462,6 +523,7 @@ vdev_free(vdev_t *vd)
vdev_metaslab_fini(vd);
ASSERT3U(vd->vdev_stat.vs_space, ==, 0);
+ ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0);
ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
/*
@@ -506,9 +568,11 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc;
tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space;
+ tvd->vdev_stat.vs_dspace = svd->vdev_stat.vs_dspace;
svd->vdev_stat.vs_alloc = 0;
svd->vdev_stat.vs_space = 0;
+ svd->vdev_stat.vs_dspace = 0;
for (t = 0; t < TXG_SIZE; t++) {
while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL)
@@ -526,6 +590,9 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
tvd->vdev_reopen_wanted = svd->vdev_reopen_wanted;
svd->vdev_reopen_wanted = 0;
+
+ tvd->vdev_deflate_ratio = svd->vdev_deflate_ratio;
+ svd->vdev_deflate_ratio = 0;
}
static void
@@ -585,13 +652,28 @@ vdev_remove_parent(vdev_t *cvd)
ASSERT(mvd->vdev_children == 1);
ASSERT(mvd->vdev_ops == &vdev_mirror_ops ||
- mvd->vdev_ops == &vdev_replacing_ops);
+ mvd->vdev_ops == &vdev_replacing_ops ||
+ mvd->vdev_ops == &vdev_spare_ops);
cvd->vdev_ashift = mvd->vdev_ashift;
vdev_remove_child(mvd, cvd);
vdev_remove_child(pvd, mvd);
cvd->vdev_id = mvd->vdev_id;
vdev_add_child(pvd, cvd);
+ /*
+ * If we created a new toplevel vdev, then we need to change the child's
+ * vdev GUID to match the old toplevel vdev. Otherwise, we could have
+ * detached an offline device, and when we go to import the pool we'll
+ * think we have two toplevel vdevs, instead of a different version of
+ * the same toplevel vdev.
+ */
+ if (cvd->vdev_top == cvd) {
+ pvd->vdev_guid_sum -= cvd->vdev_guid;
+ cvd->vdev_guid_sum -= cvd->vdev_guid;
+ cvd->vdev_guid = mvd->vdev_guid;
+ cvd->vdev_guid_sum += mvd->vdev_guid;
+ pvd->vdev_guid_sum += cvd->vdev_guid;
+ }
vdev_top_update(cvd->vdev_top, cvd->vdev_top);
if (cvd == cvd->vdev_top)
@@ -801,6 +883,18 @@ vdev_open(vdev_t *vd)
}
/*
+ * If this is a top-level vdev, compute the raidz-deflation
+ * ratio. Note, we hard-code in 128k (1<<17) because it is the
+ * current "typical" blocksize. Even if SPA_MAXBLOCKSIZE
+ * changes, this algorithm must never change, or we will
+ * inconsistently account for existing bp's.
+ */
+ if (vd->vdev_top == vd) {
+ vd->vdev_deflate_ratio = (1<<17) /
+ (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
+ }
+
+ /*
* This allows the ZFS DE to close cases appropriately. If a device
* goes away and later returns, we want to close the associated case.
* But it's not enough to simply post this only when a device goes from
@@ -933,7 +1027,7 @@ vdev_reopen(vdev_t *vd)
}
int
-vdev_create(vdev_t *vd, uint64_t txg)
+vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing)
{
int error;
@@ -952,7 +1046,7 @@ vdev_create(vdev_t *vd, uint64_t txg)
/*
* Recursively initialize all labels.
*/
- if ((error = vdev_label_init(vd, txg)) != 0) {
+ if ((error = vdev_label_init(vd, txg, isreplacing)) != 0) {
vdev_close(vd);
return (error);
}
@@ -1202,6 +1296,45 @@ vdev_load(vdev_t *vd)
VDEV_AUX_CORRUPT_DATA);
}
+/*
+ * This special case of vdev_spare() is used for hot spares. It's sole purpose
+ * it to set the vdev state for the associated vdev. To do this, we make sure
+ * that we can open the underlying device, then try to read the label, and make
+ * sure that the label is sane and that it hasn't been repurposed to another
+ * pool.
+ */
+int
+vdev_validate_spare(vdev_t *vd)
+{
+ nvlist_t *label;
+ uint64_t guid, version;
+ uint64_t state;
+
+ if ((label = vdev_label_read_config(vd)) == NULL) {
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_CORRUPT_DATA);
+ return (-1);
+ }
+
+ if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
+ version > ZFS_VERSION ||
+ nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
+ guid != vd->vdev_guid ||
+ nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_CORRUPT_DATA);
+ nvlist_free(label);
+ return (-1);
+ }
+
+ /*
+ * We don't actually check the pool state here. If it's in fact in
+ * use by another pool, we update this fact on the fly when requested.
+ */
+ nvlist_free(label);
+ return (0);
+}
+
void
vdev_sync_done(vdev_t *vd, uint64_t txg)
{
@@ -1560,14 +1693,31 @@ vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete)
* Update the in-core space usage stats for this vdev and the root vdev.
*/
void
-vdev_space_update(vdev_t *vd, uint64_t space_delta, uint64_t alloc_delta)
+vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta)
{
ASSERT(vd == vd->vdev_top);
+ int64_t dspace_delta = space_delta;
do {
+ if (vd->vdev_ms_count) {
+ /*
+ * If this is a top-level vdev, apply the
+ * inverse of its psize-to-asize (ie. RAID-Z)
+ * space-expansion factor. We must calculate
+ * this here and not at the root vdev because
+ * the root vdev's psize-to-asize is simply the
+ * max of its childrens', thus not accurate
+ * enough for us.
+ */
+ ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
+ dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
+ vd->vdev_deflate_ratio;
+ }
+
mutex_enter(&vd->vdev_stat_lock);
vd->vdev_stat.vs_space += space_delta;
vd->vdev_stat.vs_alloc += alloc_delta;
+ vd->vdev_stat.vs_dspace += dspace_delta;
mutex_exit(&vd->vdev_stat_lock);
} while ((vd = vd->vdev_parent) != NULL);
}
diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c
index 4627745067..335b3e5a36 100644
--- a/usr/src/uts/common/fs/zfs/vdev_label.c
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c
@@ -187,7 +187,8 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
* Generate the nvlist representing this vdev's config.
*/
nvlist_t *
-vdev_config_generate(vdev_t *vd, int getstats)
+vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
+ boolean_t isspare)
{
nvlist_t *nv = NULL;
@@ -195,7 +196,9 @@ vdev_config_generate(vdev_t *vd, int getstats)
VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
vd->vdev_ops->vdev_op_type) == 0);
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id) == 0);
+ if (!isspare)
+ VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id)
+ == 0);
VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0);
if (vd->vdev_path != NULL)
@@ -206,6 +209,27 @@ vdev_config_generate(vdev_t *vd, int getstats)
VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID,
vd->vdev_devid) == 0);
+ if (vd->vdev_nparity != 0) {
+ ASSERT(strcmp(vd->vdev_ops->vdev_op_type,
+ VDEV_TYPE_RAIDZ) == 0);
+
+ /*
+ * Make sure someone hasn't managed to sneak a fancy new vdev
+ * into a crufty old storage pool.
+ */
+ ASSERT(vd->vdev_nparity == 1 ||
+ (vd->vdev_nparity == 2 &&
+ spa_version(spa) >= ZFS_VERSION_RAID6));
+
+ /*
+ * Note that we'll add the nparity tag even on storage pools
+ * that only support a single parity device -- older software
+ * will just ignore it.
+ */
+ VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY,
+ vd->vdev_nparity) == 0);
+ }
+
if (vd->vdev_wholedisk != -1ULL)
VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
vd->vdev_wholedisk) == 0);
@@ -213,7 +237,10 @@ vdev_config_generate(vdev_t *vd, int getstats)
if (vd->vdev_not_present)
VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1) == 0);
- if (vd == vd->vdev_top) {
+ if (vd->vdev_isspare)
+ VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0);
+
+ if (!isspare && vd == vd->vdev_top) {
VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
vd->vdev_ms_array) == 0);
VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
@@ -243,8 +270,8 @@ vdev_config_generate(vdev_t *vd, int getstats)
KM_SLEEP);
for (c = 0; c < vd->vdev_children; c++)
- child[c] = vdev_config_generate(vd->vdev_child[c],
- getstats);
+ child[c] = vdev_config_generate(spa, vd->vdev_child[c],
+ getstats, isspare);
VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
child, vd->vdev_children) == 0);
@@ -307,8 +334,9 @@ vdev_label_read_config(vdev_t *vd)
return (config);
}
-int
-vdev_label_init(vdev_t *vd, uint64_t crtxg)
+static int
+vdev_label_common(vdev_t *vd, uint64_t crtxg, boolean_t isspare,
+ boolean_t isreplacing)
{
spa_t *spa = vd->vdev_spa;
nvlist_t *label;
@@ -324,7 +352,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
ASSERT(spa_config_held(spa, RW_WRITER));
for (c = 0; c < vd->vdev_children; c++)
- if ((error = vdev_label_init(vd->vdev_child[c], crtxg)) != 0)
+ if ((error = vdev_label_common(vd->vdev_child[c],
+ crtxg, isspare, isreplacing)) != 0)
return (error);
if (!vd->vdev_ops->vdev_op_leaf)
@@ -346,7 +375,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
*/
if (crtxg != 0 &&
(label = vdev_label_read_config(vd)) != NULL) {
- uint64_t state, pool_guid, device_guid, txg;
+ uint64_t state, pool_guid, device_guid, txg, spare;
uint64_t mycrtxg = 0;
(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
@@ -361,11 +390,61 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
spa_guid_exists(pool_guid, device_guid) &&
nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
&txg) == 0 && (txg != 0 || mycrtxg == crtxg)) {
- dprintf("vdev %s in use, pool_state %d\n",
- vdev_description(vd), state);
+ if (isspare && pool_guid != spa_guid(spa) &&
+ nvlist_lookup_uint64(label,
+ ZPOOL_CONFIG_IS_SPARE, &spare) == 0 &&
+ !spa_has_spare(spa, device_guid)) {
+ /*
+ * If this is a request to add a spare that
+ * is actively in use in another pool, simply
+ * return success, after updating the guid.
+ */
+ vdev_t *pvd = vd->vdev_parent;
+
+ for (; pvd != NULL; pvd = pvd->vdev_parent) {
+ pvd->vdev_guid_sum -= vd->vdev_guid;
+ pvd->vdev_guid_sum += device_guid;
+ }
+
+ vd->vdev_guid = vd->vdev_guid_sum = device_guid;
+ nvlist_free(label);
+ return (0);
+ }
nvlist_free(label);
return (EBUSY);
}
+
+ /*
+ * If this device is reserved as a hot spare for this pool,
+ * adopt its GUID, and mark it as such. This way we preserve
+ * the fact that it is a hot spare even as it is added and
+ * removed from the pool.
+ */
+ if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE,
+ &state) == 0 && state == POOL_STATE_SPARE &&
+ nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID,
+ &device_guid) == 0) {
+ vdev_t *pvd = vd->vdev_parent;
+
+ if ((isspare || !isreplacing) &&
+ spa_has_spare(spa, device_guid)) {
+ nvlist_free(label);
+ return (EBUSY);
+ }
+
+ for (; pvd != NULL; pvd = pvd->vdev_parent) {
+ pvd->vdev_guid_sum -= vd->vdev_guid;
+ pvd->vdev_guid_sum += device_guid;
+ }
+
+ vd->vdev_guid = vd->vdev_guid_sum = device_guid;
+
+ if (!isspare) {
+ vd->vdev_isspare = B_TRUE;
+ spa_spare_add(vd->vdev_guid);
+ }
+ }
+
nvlist_free(label);
}
@@ -380,14 +459,35 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
* We mark it as being from txg 0 to indicate that it's not
* really part of an active pool just yet. The labels will
* be written again with a meaningful txg by spa_sync().
+ *
+ * For hot spares, we generate a special label that identifies as a
+ * mutually shared hot spare. If this is being added as a hot spare,
+ * always write out the spare label. If this was a hot spare, then
+ * always label it as such. If we are adding the vdev, it will remain
+ * labelled in this state until it's really added to the config. If we
+ * are removing the vdev or destroying the pool, then it goes back to
+ * its original hot spare state.
*/
- label = spa_config_generate(spa, vd, 0ULL, B_FALSE);
-
- /*
- * Add our creation time. This allows us to detect multiple vdev
- * uses as described above, and automatically expires if we fail.
- */
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG, crtxg) == 0);
+ if (isspare || vd->vdev_isspare) {
+ VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION,
+ spa_version(spa)) == 0);
+ VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE,
+ POOL_STATE_SPARE) == 0);
+ VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
+ vd->vdev_guid) == 0);
+ } else {
+ label = spa_config_generate(spa, vd, 0ULL, B_FALSE);
+
+ /*
+ * Add our creation time. This allows us to detect multiple
+ * vdev uses as described above, and automatically expires if we
+ * fail.
+ */
+ VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
+ crtxg) == 0);
+ }
buf = vp->vp_nvlist;
buflen = sizeof (vp->vp_nvlist);
@@ -449,6 +549,22 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
return (error);
}
+int
+vdev_label_init(vdev_t *vd, uint64_t crtxg, boolean_t isreplacing)
+{
+ return (vdev_label_common(vd, crtxg, B_FALSE, isreplacing));
+}
+
+/*
+ * Label a disk as a hot spare. A hot spare label is a special label with only
+ * the following members: version, pool_state, and guid.
+ */
+int
+vdev_label_spare(vdev_t *vd, uint64_t crtxg)
+{
+ return (vdev_label_common(vd, crtxg, B_TRUE, B_FALSE));
+}
+
/*
* ==========================================================================
* uberblock load/sync
diff --git a/usr/src/uts/common/fs/zfs/vdev_mirror.c b/usr/src/uts/common/fs/zfs/vdev_mirror.c
index eb3f0a862d..14a6ce7e6e 100644
--- a/usr/src/uts/common/fs/zfs/vdev_mirror.c
+++ b/usr/src/uts/common/fs/zfs/vdev_mirror.c
@@ -85,6 +85,7 @@ vdev_mirror_map_alloc(zio_t *zio)
for (c = 0; c < mm->mm_children; c++) {
mc = &mm->mm_child[c];
+
mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c]));
mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
}
@@ -93,7 +94,8 @@ vdev_mirror_map_alloc(zio_t *zio)
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
mm->mm_children = c;
- mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops);
+ mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
+ vd->vdev_ops == &vdev_spare_ops);
mm->mm_preferred = mm->mm_replacing ? 0 : spa_get_random(c);
mm->mm_root = B_FALSE;
@@ -477,3 +479,14 @@ vdev_ops_t vdev_replacing_ops = {
VDEV_TYPE_REPLACING, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
+
+vdev_ops_t vdev_spare_ops = {
+ vdev_mirror_open,
+ vdev_mirror_close,
+ vdev_default_asize,
+ vdev_mirror_io_start,
+ vdev_mirror_io_done,
+ vdev_mirror_state_change,
+ VDEV_TYPE_SPARE, /* name of this vdev type */
+ B_FALSE /* not a leaf vdev */
+};
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index 33225de39b..3afeab0aef 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -35,49 +36,178 @@
/*
* Virtual device vector for RAID-Z.
+ *
+ * This vdev supports both single and double parity. For single parity, we
+ * use a simple XOR of all the data columns. For double parity, we use both
+ * the simple XOR as well as a technique described in "The mathematics of
+ * RAID-6" by H. Peter Anvin. This technique defines a Galois field, GF(2^8),
+ * over the integers expressable in a single byte. Briefly, the operations on
+ * the field are defined as follows:
+ *
+ * o addition (+) is represented by a bitwise XOR
+ * o subtraction (-) is therefore identical to addition: A + B = A - B
+ * o multiplication of A by 2 is defined by the following bitwise expression:
+ * (A * 2)_7 = A_6
+ * (A * 2)_6 = A_5
+ * (A * 2)_5 = A_4
+ * (A * 2)_4 = A_3 + A_7
+ * (A * 2)_3 = A_2 + A_7
+ * (A * 2)_2 = A_1 + A_7
+ * (A * 2)_1 = A_0
+ * (A * 2)_0 = A_7
+ *
+ * In C, multiplying by 2 is therefore ((a << 1) ^ ((a & 0x80) ? 0x1d : 0)).
+ *
+ * Observe that any number in the field (except for 0) can be expressed as a
+ * power of 2 -- a generator for the field. We store a table of the powers of
+ * 2 and logs base 2 for quick look ups, and exploit the fact that A * B can
+ * be rewritten as 2^(log_2(A) + log_2(B)) (where '+' is normal addition rather
+ * than field addition). The inverse of a field element A (A^-1) is A^254.
+ *
+ * The two parity columns, P and Q, over several data columns, D_0, ... D_n-1,
+ * can be expressed by field operations:
+ *
+ * P = D_0 + D_1 + ... + D_n-2 + D_n-1
+ * Q = 2^n-1 * D_0 + 2^n-2 * D_1 + ... + 2^1 * D_n-2 + 2^0 * D_n-1
+ * = ((...((D_0) * 2 + D_1) * 2 + ...) * 2 + D_n-2) * 2 + D_n-1
+ *
+ * See the reconstruction code below for how P and Q can used individually or
+ * in concert to recover missing data columns.
*/
-/*
- * We currently allow up to two-way replication (i.e. single-fault
- * reconstruction) models in RAID-Z vdevs. The blocks in such vdevs
- * must all be multiples of two times the leaf vdev blocksize.
- */
-#define VDEV_RAIDZ_ALIGN 2ULL
-
typedef struct raidz_col {
- uint64_t rc_col;
- uint64_t rc_offset;
- uint64_t rc_size;
- void *rc_data;
- int rc_error;
- short rc_tried;
- short rc_skipped;
+ uint64_t rc_devidx; /* child device index for I/O */
+ uint64_t rc_offset; /* device offset */
+ uint64_t rc_size; /* I/O size */
+ void *rc_data; /* I/O data */
+ int rc_error; /* I/O error for this device */
+ uint8_t rc_tried; /* Did we attempt this I/O column? */
+ uint8_t rc_skipped; /* Did we skip this I/O column? */
} raidz_col_t;
typedef struct raidz_map {
- uint64_t rm_cols;
- uint64_t rm_bigcols;
- uint64_t rm_asize;
- int rm_missing_child;
- int rm_firstdatacol;
- raidz_col_t rm_col[1];
+ uint64_t rm_cols; /* Column count */
+ uint64_t rm_bigcols; /* Number of oversized columns */
+ uint64_t rm_asize; /* Actual total I/O size */
+ uint64_t rm_missingdata; /* Count of missing data devices */
+ uint64_t rm_missingparity; /* Count of missing parity devices */
+ uint64_t rm_firstdatacol; /* First data column/parity count */
+ raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
} raidz_map_t;
+#define VDEV_RAIDZ_P 0
+#define VDEV_RAIDZ_Q 1
+
+#define VDEV_RAIDZ_MAXPARITY 2
+
+#define VDEV_RAIDZ_MUL_2(a) (((a) << 1) ^ (((a) & 0x80) ? 0x1d : 0))
+
+/*
+ * These two tables represent powers and logs of 2 in the Galois field defined
+ * above. These values were computed by repeatedly multiplying by 2 as above.
+ */
+static const uint8_t vdev_raidz_pow2[256] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+ 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+ 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+ 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+ 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+ 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+ 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+ 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+ 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+ 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+ 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+ 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+ 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+ 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+ 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+ 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+ 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+ 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+ 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+ 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+ 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+ 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+ 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+ 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+ 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+ 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+ 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+ 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+ 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
+};
+static const uint8_t vdev_raidz_log2[256] = {
+ 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
+ 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
+ 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
+ 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
+ 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
+ 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
+ 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
+ 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
+ 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
+ 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
+ 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
+ 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
+ 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
+ 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
+ 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
+ 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
+ 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
+ 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
+ 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
+ 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
+ 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
+ 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
+ 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
+ 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
+ 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
+ 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
+ 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
+ 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
+ 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
+ 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
+ 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
+ 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
+};
+
+/*
+ * Multiply a given number by 2 raised to the given power.
+ */
+static uint8_t
+vdev_raidz_exp2(uint_t a, int exp)
+{
+ if (a == 0)
+ return (0);
+
+ ASSERT(exp >= 0);
+ ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
+
+ exp += vdev_raidz_log2[a];
+ if (exp > 255)
+ exp -= 255;
+
+ return (vdev_raidz_pow2[exp]);
+}
+
static raidz_map_t *
-vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
+vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
+ uint64_t nparity)
{
raidz_map_t *rm;
uint64_t b = zio->io_offset >> unit_shift;
uint64_t s = zio->io_size >> unit_shift;
uint64_t f = b % dcols;
uint64_t o = (b / dcols) << unit_shift;
- uint64_t q, r, c, bc, col, acols, coff;
- int firstdatacol;
+ uint64_t q, r, c, bc, col, acols, coff, devidx;
- q = s / (dcols - 1);
- r = s - q * (dcols - 1);
- bc = r + !!r;
- firstdatacol = 1;
+ q = s / (dcols - nparity);
+ r = s - q * (dcols - nparity);
+ bc = (r == 0 ? 0 : r + nparity);
acols = (q == 0 ? bc : dcols);
@@ -86,8 +216,9 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
rm->rm_cols = acols;
rm->rm_bigcols = bc;
rm->rm_asize = 0;
- rm->rm_missing_child = -1;
- rm->rm_firstdatacol = firstdatacol;
+ rm->rm_missingdata = 0;
+ rm->rm_missingparity = 0;
+ rm->rm_firstdatacol = nparity;
for (c = 0; c < acols; c++) {
col = f + c;
@@ -96,7 +227,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
col -= dcols;
coff += 1ULL << unit_shift;
}
- rm->rm_col[c].rc_col = col;
+ rm->rm_col[c].rc_devidx = col;
rm->rm_col[c].rc_offset = coff;
rm->rm_col[c].rc_size = (q + (c < bc)) << unit_shift;
rm->rm_col[c].rc_data = NULL;
@@ -106,7 +237,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
rm->rm_asize += rm->rm_col[c].rc_size;
}
- rm->rm_asize = P2ROUNDUP(rm->rm_asize, VDEV_RAIDZ_ALIGN << unit_shift);
+ rm->rm_asize = roundup(rm->rm_asize, (nparity + 1) << unit_shift);
for (c = 0; c < rm->rm_firstdatacol; c++)
rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size);
@@ -118,18 +249,29 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
rm->rm_col[c - 1].rc_size;
/*
- * To prevent hot parity disks, switch the parity and data
- * columns every 1MB.
+ * If all data stored spans all columns, there's a danger that parity
+ * will always be on the same device and, since parity isn't read
+ * during normal operation, that that device's I/O bandwidth won't be
+ * used effectively. We therefore switch the parity every 1MB.
+ *
+ * ... at least that was, ostensibly, the theory. As a practical
+ * matter unless we juggle the parity between all devices evenly, we
+ * won't see any benefit. Further, occasional writes that aren't a
+ * multiple of the LCM of the number of children and the minimum
+ * stripe width are sufficient to avoid pessimal behavior.
+ * Unfortunately, this decision created an implicit on-disk format
+ * requirement that we need to support for all eternity (but only for
+ * RAID-Z with one parity device).
*/
ASSERT(rm->rm_cols >= 2);
ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size);
- if (zio->io_offset & (1ULL << 20)) {
- col = rm->rm_col[0].rc_col;
+ if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) {
+ devidx = rm->rm_col[0].rc_devidx;
o = rm->rm_col[0].rc_offset;
- rm->rm_col[0].rc_col = rm->rm_col[1].rc_col;
+ rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx;
rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset;
- rm->rm_col[1].rc_col = col;
+ rm->rm_col[1].rc_devidx = devidx;
rm->rm_col[1].rc_offset = o;
}
@@ -151,47 +293,284 @@ vdev_raidz_map_free(zio_t *zio)
}
static void
-vdev_raidz_reconstruct(raidz_map_t *rm, int x)
+vdev_raidz_generate_parity_p(raidz_map_t *rm)
{
- uint64_t *dst, *src, count, xsize, csize;
- int i, c;
+ uint64_t *p, *src, pcount, ccount, i;
+ int c;
+
+ pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
+
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ src = rm->rm_col[c].rc_data;
+ p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+ ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+
+ if (c == rm->rm_firstdatacol) {
+ ASSERT(ccount == pcount);
+ for (i = 0; i < ccount; i++, p++, src++) {
+ *p = *src;
+ }
+ } else {
+ ASSERT(ccount <= pcount);
+ for (i = 0; i < ccount; i++, p++, src++) {
+ *p ^= *src;
+ }
+ }
+ }
+}
+
+static void
+vdev_raidz_generate_parity_pq(raidz_map_t *rm)
+{
+ uint64_t *q, *p, *src, pcount, ccount, mask, i;
+ int c;
+
+ pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
+ ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
+ rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ src = rm->rm_col[c].rc_data;
+ p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+ q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+ ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+
+ if (c == rm->rm_firstdatacol) {
+ ASSERT(ccount == pcount || ccount == 0);
+ for (i = 0; i < ccount; i++, p++, q++, src++) {
+ *q = *src;
+ *p = *src;
+ }
+ for (; i < pcount; i++, p++, q++, src++) {
+ *q = 0;
+ *p = 0;
+ }
+ } else {
+ ASSERT(ccount <= pcount);
+
+ /*
+ * Rather than multiplying each byte individually (as
+ * described above), we are able to handle 8 at once
+ * by generating a mask based on the high bit in each
+ * byte and using that to conditionally XOR in 0x1d.
+ */
+ for (i = 0; i < ccount; i++, p++, q++, src++) {
+ mask = *q & 0x8080808080808080ULL;
+ mask = (mask << 1) - (mask >> 7);
+ *q = ((*q << 1) & 0xfefefefefefefefeULL) ^
+ (mask & 0x1d1d1d1d1d1d1d1dULL);
+ *q ^= *src;
+ *p ^= *src;
+ }
+
+ /*
+ * Treat short columns as though they are full of 0s.
+ */
+ for (; i < pcount; i++, q++) {
+ mask = *q & 0x8080808080808080ULL;
+ mask = (mask << 1) - (mask >> 7);
+ *q = ((*q << 1) & 0xfefefefefefefefeULL) ^
+ (mask & 0x1d1d1d1d1d1d1d1dULL);
+ }
+ }
+ }
+}
+
+static void
+vdev_raidz_reconstruct_p(raidz_map_t *rm, int x)
+{
+ uint64_t *dst, *src, xcount, ccount, count, i;
+ int c;
+
+ xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
+ ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]));
+ ASSERT(xcount > 0);
+
+ src = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+ dst = rm->rm_col[x].rc_data;
+ for (i = 0; i < xcount; i++, dst++, src++) {
+ *dst = *src;
+ }
+
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ src = rm->rm_col[c].rc_data;
+ dst = rm->rm_col[x].rc_data;
- for (c = 0; c < rm->rm_cols; c++) {
if (c == x)
continue;
+
+ ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+ count = MIN(ccount, xcount);
+
+ for (i = 0; i < count; i++, dst++, src++) {
+ *dst ^= *src;
+ }
+ }
+}
+
+static void
+vdev_raidz_reconstruct_q(raidz_map_t *rm, int x)
+{
+ uint64_t *dst, *src, xcount, ccount, count, mask, i;
+ uint8_t *b;
+ int c, j, exp;
+
+ xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
+ ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0]));
+
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
src = rm->rm_col[c].rc_data;
dst = rm->rm_col[x].rc_data;
- csize = rm->rm_col[c].rc_size;
- xsize = rm->rm_col[x].rc_size;
- count = MIN(csize, xsize) / sizeof (uint64_t);
- if (c == !x) {
+
+ if (c == x)
+ ccount = 0;
+ else
+ ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+
+ count = MIN(ccount, xcount);
+
+ if (c == rm->rm_firstdatacol) {
+ for (i = 0; i < count; i++, dst++, src++) {
+ *dst = *src;
+ }
+ for (; i < xcount; i++, dst++) {
+ *dst = 0;
+ }
+
+ } else {
/*
- * The initial copy happens at either c == 0 or c == 1.
- * Both of these columns are 'big' columns, so we'll
- * definitely initialize all of column x.
+ * For an explanation of this, see the comment in
+ * vdev_raidz_generate_parity_pq() above.
*/
- ASSERT3U(xsize, <=, csize);
- for (i = 0; i < count; i++)
- *dst++ = *src++;
- } else {
- for (i = 0; i < count; i++)
- *dst++ ^= *src++;
+ for (i = 0; i < count; i++, dst++, src++) {
+ mask = *dst & 0x8080808080808080ULL;
+ mask = (mask << 1) - (mask >> 7);
+ *dst = ((*dst << 1) & 0xfefefefefefefefeULL) ^
+ (mask & 0x1d1d1d1d1d1d1d1dULL);
+ *dst ^= *src;
+ }
+
+ for (; i < xcount; i++, dst++) {
+ mask = *dst & 0x8080808080808080ULL;
+ mask = (mask << 1) - (mask >> 7);
+ *dst = ((*dst << 1) & 0xfefefefefefefefeULL) ^
+ (mask & 0x1d1d1d1d1d1d1d1dULL);
+ }
+ }
+ }
+
+ src = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+ dst = rm->rm_col[x].rc_data;
+ exp = 255 - (rm->rm_cols - 1 - x);
+
+ for (i = 0; i < xcount; i++, dst++, src++) {
+ *dst ^= *src;
+ for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) {
+ *b = vdev_raidz_exp2(*b, exp);
}
}
}
+static void
+vdev_raidz_reconstruct_pq(raidz_map_t *rm, int x, int y)
+{
+ uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp;
+ void *pdata, *qdata;
+ uint64_t xsize, ysize, i;
+
+ ASSERT(x < y);
+ ASSERT(x >= rm->rm_firstdatacol);
+ ASSERT(y < rm->rm_cols);
+
+ ASSERT(rm->rm_col[x].rc_size >= rm->rm_col[y].rc_size);
+
+ /*
+ * Move the parity data aside -- we're going to compute parity as
+ * though columns x and y were full of zeros -- Pxy and Qxy. We want to
+ * reuse the parity generation mechanism without trashing the actual
+ * parity so we make those columns appear to be full of zeros by
+ * setting their lengths to zero.
+ */
+ pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+ qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+ xsize = rm->rm_col[x].rc_size;
+ ysize = rm->rm_col[y].rc_size;
+
+ rm->rm_col[VDEV_RAIDZ_P].rc_data =
+ zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size);
+ rm->rm_col[VDEV_RAIDZ_Q].rc_data =
+ zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+ rm->rm_col[x].rc_size = 0;
+ rm->rm_col[y].rc_size = 0;
+
+ vdev_raidz_generate_parity_pq(rm);
+
+ rm->rm_col[x].rc_size = xsize;
+ rm->rm_col[y].rc_size = ysize;
+
+ p = pdata;
+ q = qdata;
+ pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+ qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+ xd = rm->rm_col[x].rc_data;
+ yd = rm->rm_col[y].rc_data;
+
+ /*
+ * We now have:
+ * Pxy = P + D_x + D_y
+ * Qxy = Q + 2^(ndevs - 1 - x) * D_x + 2^(ndevs - 1 - y) * D_y
+ *
+ * We can then solve for D_x:
+ * D_x = A * (P + Pxy) + B * (Q + Qxy)
+ * where
+ * A = 2^(x - y) * (2^(x - y) + 1)^-1
+ * B = 2^(ndevs - 1 - x) * (2^(x - y) + 1)^-1
+ *
+ * With D_x in hand, we can easily solve for D_y:
+ * D_y = P + Pxy + D_x
+ */
+
+ a = vdev_raidz_pow2[255 + x - y];
+ b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)];
+ tmp = 255 - vdev_raidz_log2[a ^ 1];
+
+ aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)];
+ bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)];
+
+ for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) {
+ *xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^
+ vdev_raidz_exp2(*q ^ *qxy, bexp);
+
+ if (i < ysize)
+ *yd = *p ^ *pxy ^ *xd;
+ }
+
+ zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data,
+ rm->rm_col[VDEV_RAIDZ_P].rc_size);
+ zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data,
+ rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+
+ /*
+ * Restore the saved parity data.
+ */
+ rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata;
+ rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata;
+}
+
+
static int
vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
{
vdev_t *cvd;
+ uint64_t nparity = vd->vdev_nparity;
int c, error;
int lasterror = 0;
int numerrors = 0;
- /*
- * XXX -- minimum children should be raid-type-specific
- */
- if (vd->vdev_children < 2) {
+ ASSERT(nparity > 0);
+
+ if (nparity > VDEV_RAIDZ_MAXPARITY ||
+ vd->vdev_children < nparity + 1) {
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
return (EINVAL);
}
@@ -211,7 +590,7 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
*asize *= vd->vdev_children;
- if (numerrors > 1) {
+ if (numerrors > nparity) {
vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
return (lasterror);
}
@@ -234,10 +613,11 @@ vdev_raidz_asize(vdev_t *vd, uint64_t psize)
uint64_t asize;
uint64_t ashift = vd->vdev_top->vdev_ashift;
uint64_t cols = vd->vdev_children;
+ uint64_t nparity = vd->vdev_nparity;
asize = ((psize - 1) >> ashift) + 1;
- asize += (asize + cols - 2) / (cols - 1);
- asize = P2ROUNDUP(asize, VDEV_RAIDZ_ALIGN) << ashift;
+ asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity));
+ asize = roundup(asize, nparity + 1) << ashift;
return (asize);
}
@@ -270,20 +650,23 @@ vdev_raidz_io_start(zio_t *zio)
raidz_col_t *rc;
int c;
- rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children);
+ rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
+ vd->vdev_nparity);
ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size));
if (zio->io_type == ZIO_TYPE_WRITE) {
-
/*
- * Generate RAID parity in virtual column 0.
+ * Generate RAID parity in the first virtual columns.
*/
- vdev_raidz_reconstruct(rm, 0);
+ if (rm->rm_firstdatacol == 1)
+ vdev_raidz_generate_parity_p(rm);
+ else
+ vdev_raidz_generate_parity_pq(rm);
for (c = 0; c < rm->rm_cols; c++) {
rc = &rm->rm_col[c];
- cvd = vd->vdev_child[rc->rc_col];
+ cvd = vd->vdev_child[rc->rc_devidx];
zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
rc->rc_offset, rc->rc_data, rc->rc_size,
zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL,
@@ -295,23 +678,34 @@ vdev_raidz_io_start(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_READ);
+ /*
+ * Iterate over the columns in reverse order so that we hit the parity
+ * last -- any errors along the way will force us to read the parity
+ * data.
+ */
for (c = rm->rm_cols - 1; c >= 0; c--) {
rc = &rm->rm_col[c];
- cvd = vd->vdev_child[rc->rc_col];
+ cvd = vd->vdev_child[rc->rc_devidx];
if (vdev_is_dead(cvd)) {
- rm->rm_missing_child = c;
+ if (c >= rm->rm_firstdatacol)
+ rm->rm_missingdata++;
+ else
+ rm->rm_missingparity++;
rc->rc_error = ENXIO;
rc->rc_tried = 1; /* don't even try */
rc->rc_skipped = 1;
continue;
}
if (vdev_dtl_contains(&cvd->vdev_dtl_map, bp->blk_birth, 1)) {
- rm->rm_missing_child = c;
+ if (c >= rm->rm_firstdatacol)
+ rm->rm_missingdata++;
+ else
+ rm->rm_missingparity++;
rc->rc_error = ESTALE;
rc->rc_skipped = 1;
continue;
}
- if (c >= rm->rm_firstdatacol || rm->rm_missing_child != -1 ||
+ if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 ||
(zio->io_flags & ZIO_FLAG_SCRUB)) {
zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
rc->rc_offset, rc->rc_data, rc->rc_size,
@@ -329,7 +723,7 @@ vdev_raidz_io_start(zio_t *zio)
static void
raidz_checksum_error(zio_t *zio, raidz_col_t *rc)
{
- vdev_t *vd = zio->io_vd->vdev_child[rc->rc_col];
+ vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
dprintf_bp(zio->io_bp, "imputed checksum error on %s: ",
vdev_description(vd));
@@ -344,6 +738,50 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc)
zio->io_spa, vd, zio, rc->rc_offset, rc->rc_size);
}
+/*
+ * Generate the parity from the data columns. If we tried and were able to
+ * read the parity without error, verify that the generated parity matches the
+ * data we read. If it doesn't, we fire off a checksum error. Return the
+ * number such failures.
+ */
+static int
+raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
+{
+ void *orig[VDEV_RAIDZ_MAXPARITY];
+ int c, ret = 0;
+ raidz_col_t *rc;
+
+ for (c = 0; c < rm->rm_firstdatacol; c++) {
+ rc = &rm->rm_col[c];
+ if (!rc->rc_tried || rc->rc_error != 0)
+ continue;
+ orig[c] = zio_buf_alloc(rc->rc_size);
+ bcopy(rc->rc_data, orig[c], rc->rc_size);
+ }
+
+ if (rm->rm_firstdatacol == 1)
+ vdev_raidz_generate_parity_p(rm);
+ else
+ vdev_raidz_generate_parity_pq(rm);
+
+ for (c = 0; c < rm->rm_firstdatacol; c++) {
+ rc = &rm->rm_col[c];
+ if (!rc->rc_tried || rc->rc_error != 0)
+ continue;
+ if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) {
+ raidz_checksum_error(zio, rc);
+ rc->rc_error = ECKSUM;
+ ret++;
+ }
+ zio_buf_free(orig[c], rc->rc_size);
+ }
+
+ return (ret);
+}
+
+static uint64_t raidz_corrected_p;
+static uint64_t raidz_corrected_q;
+static uint64_t raidz_corrected_pq;
static void
vdev_raidz_io_done(zio_t *zio)
@@ -351,15 +789,20 @@ vdev_raidz_io_done(zio_t *zio)
vdev_t *vd = zio->io_vd;
vdev_t *cvd;
raidz_map_t *rm = zio->io_vsd;
- raidz_col_t *rc;
+ raidz_col_t *rc, *rc1;
int unexpected_errors = 0;
- int c;
+ int parity_errors = 0;
+ int data_errors = 0;
+ int n, c, c1;
ASSERT(zio->io_bp != NULL); /* XXX need to add code to enforce this */
zio->io_error = 0;
zio->io_numerrors = 0;
+ ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol);
+ ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol);
+
for (c = 0; c < rm->rm_cols; c++) {
rc = &rm->rm_col[c];
@@ -370,8 +813,15 @@ vdev_raidz_io_done(zio_t *zio)
if (rc->rc_error) {
if (zio->io_error != EIO)
zio->io_error = rc->rc_error;
+
+ if (c < rm->rm_firstdatacol)
+ parity_errors++;
+ else
+ data_errors++;
+
if (!rc->rc_skipped)
unexpected_errors++;
+
zio->io_numerrors++;
}
}
@@ -392,149 +842,288 @@ vdev_raidz_io_done(zio_t *zio)
}
ASSERT(zio->io_type == ZIO_TYPE_READ);
+ /*
+ * There are three potential phases for a read:
+ * 1. produce valid data from the columns read
+ * 2. read all disks and try again
+ * 3. perform combinatorial reconstruction
+ *
+ * Each phase is progressively both more expensive and less likely to
+ * occur. If we encounter more errors than we can repair or all phases
+ * fail, we have no choice but to return an error.
+ */
/*
- * If there were no I/O errors, and the data checksums correctly,
- * the read is complete.
+ * If the number of errors we saw was correctable -- less than or equal
+ * to the number of parity disks -- attempt to produce data that has a
+ * valid checksum. Naturally, zero errors falls into this case.
*/
- /* XXPOLICY */
- if (zio->io_numerrors == 0 && zio_checksum_error(zio) == 0) {
- ASSERT(unexpected_errors == 0);
- ASSERT(zio->io_error == 0);
+ if (zio->io_numerrors <= rm->rm_firstdatacol) {
+ switch (data_errors) {
+ case 0:
+ if (zio_checksum_error(zio) == 0) {
+ zio->io_error = 0;
+ n = raidz_parity_verify(zio, rm);
+ unexpected_errors += n;
+ ASSERT(parity_errors + n <=
+ rm->rm_firstdatacol);
+ goto done;
+ }
+ break;
- /*
- * We know the data's good. If we read the parity,
- * verify that it's good as well. If not, fix it.
- */
- for (c = 0; c < rm->rm_firstdatacol; c++) {
- void *orig;
- rc = &rm->rm_col[c];
- if (!rc->rc_tried)
- continue;
- orig = zio_buf_alloc(rc->rc_size);
- bcopy(rc->rc_data, orig, rc->rc_size);
- vdev_raidz_reconstruct(rm, c);
- if (bcmp(orig, rc->rc_data, rc->rc_size) != 0) {
- raidz_checksum_error(zio, rc);
- rc->rc_error = ECKSUM;
- unexpected_errors++;
+ case 1:
+ ASSERT(parity_errors < rm->rm_firstdatacol);
+
+ /*
+ * Find the column that reported the error.
+ */
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ rc = &rm->rm_col[c];
+ if (rc->rc_error != 0)
+ break;
+ }
+ ASSERT(c != rm->rm_cols);
+ ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
+ rc->rc_error == ESTALE);
+
+ if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) {
+ vdev_raidz_reconstruct_p(rm, c);
+ } else {
+ ASSERT(rm->rm_firstdatacol > 1);
+ vdev_raidz_reconstruct_q(rm, c);
}
- zio_buf_free(orig, rc->rc_size);
- }
- goto done;
- }
- /*
- * If there was exactly one I/O error, it's the one we expected,
- * and the reconstructed data checksums, the read is complete.
- * This happens when one child is offline and vdev_fault_assess()
- * knows it, or when one child has stale data and the DTL knows it.
- */
- if (zio->io_numerrors == 1 && (c = rm->rm_missing_child) != -1) {
- rc = &rm->rm_col[c];
- ASSERT(unexpected_errors == 0);
- ASSERT(rc->rc_error == ENXIO || rc->rc_error == ESTALE);
- vdev_raidz_reconstruct(rm, c);
- if (zio_checksum_error(zio) == 0) {
- zio->io_error = 0;
- goto done;
+ if (zio_checksum_error(zio) == 0) {
+ zio->io_error = 0;
+ if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0)
+ atomic_inc_64(&raidz_corrected_p);
+ else
+ atomic_inc_64(&raidz_corrected_q);
+
+ /*
+ * If there's more than one parity disk,
+ * confirm that the parity disk not used above
+ * has the correct data.
+ */
+ if (rm->rm_firstdatacol > 1) {
+ n = raidz_parity_verify(zio, rm);
+ unexpected_errors += n;
+ ASSERT(parity_errors + n <=
+ rm->rm_firstdatacol);
+ }
+
+ goto done;
+ }
+ break;
+
+ case 2:
+ /*
+ * Find the two columns that reported errors.
+ */
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ rc = &rm->rm_col[c];
+ if (rc->rc_error != 0)
+ break;
+ }
+ ASSERT(c != rm->rm_cols);
+ ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
+ rc->rc_error == ESTALE);
+
+ for (c1 = c++; c < rm->rm_cols; c++) {
+ rc = &rm->rm_col[c];
+ if (rc->rc_error != 0)
+ break;
+ }
+ ASSERT(c != rm->rm_cols);
+ ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
+ rc->rc_error == ESTALE);
+
+ vdev_raidz_reconstruct_pq(rm, c1, c);
+
+ if (zio_checksum_error(zio) == 0) {
+ zio->io_error = 0;
+ atomic_inc_64(&raidz_corrected_pq);
+
+ goto done;
+ }
+ break;
+
+ default:
+ ASSERT(rm->rm_firstdatacol <= 2);
+ ASSERT(0);
}
}
/*
- * This isn't a typical error -- either we got a read error or
- * more than one child claimed a problem. Read every block we
- * haven't already so we can try combinatorial reconstruction.
+ * This isn't a typical situation -- either we got a read error or
+ * a child silently returned bad data. Read every block so we can
+ * try again with as much data and parity as we can track down. If
+ * we've already been through once before, all children will be marked
+ * as tried so we'll proceed to combinatorial reconstruction.
*/
unexpected_errors = 1;
- rm->rm_missing_child = -1;
+ rm->rm_missingdata = 0;
+ rm->rm_missingparity = 0;
- for (c = 0; c < rm->rm_cols; c++)
- if (!rm->rm_col[c].rc_tried)
- break;
+ for (c = 0; c < rm->rm_cols; c++) {
+ if (rm->rm_col[c].rc_tried)
+ continue;
- if (c != rm->rm_cols) {
zio->io_error = 0;
zio_vdev_io_redone(zio);
- for (c = 0; c < rm->rm_cols; c++) {
+ do {
rc = &rm->rm_col[c];
if (rc->rc_tried)
continue;
zio_nowait(zio_vdev_child_io(zio, NULL,
- vd->vdev_child[rc->rc_col],
+ vd->vdev_child[rc->rc_devidx],
rc->rc_offset, rc->rc_data, rc->rc_size,
zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL,
vdev_raidz_child_done, rc));
- }
+ } while (++c < rm->rm_cols);
+ dprintf("rereading\n");
zio_wait_children_done(zio);
return;
}
/*
- * If there were more errors than parity disks, give up.
+ * At this point we've attempted to reconstruct the data given the
+ * errors we detected, and we've attempted to read all columns. There
+ * must, therefore, be one or more additional problems -- silent errors
+ * resulting in invalid data rather than explicit I/O errors resulting
+ * in absent data. Before we attempt combinatorial reconstruction make
+ * sure we have a chance of coming up with the right answer.
*/
- if (zio->io_numerrors > rm->rm_firstdatacol) {
+ if (zio->io_numerrors >= rm->rm_firstdatacol) {
ASSERT(zio->io_error != 0);
goto done;
}
- /*
- * The number of I/O errors is correctable. Correct them here.
- */
- ASSERT(zio->io_numerrors <= rm->rm_firstdatacol);
- for (c = 0; c < rm->rm_cols; c++) {
- rc = &rm->rm_col[c];
- ASSERT(rc->rc_tried);
- if (rc->rc_error) {
- vdev_raidz_reconstruct(rm, c);
- if (zio_checksum_error(zio) == 0)
+ if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) {
+ /*
+ * Attempt to reconstruct the data from parity P.
+ */
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ void *orig;
+ rc = &rm->rm_col[c];
+
+ orig = zio_buf_alloc(rc->rc_size);
+ bcopy(rc->rc_data, orig, rc->rc_size);
+ vdev_raidz_reconstruct_p(rm, c);
+
+ if (zio_checksum_error(zio) == 0) {
+ zio_buf_free(orig, rc->rc_size);
zio->io_error = 0;
- else
- zio->io_error = rc->rc_error;
- goto done;
+ atomic_inc_64(&raidz_corrected_p);
+
+ /*
+ * If this child didn't know that it returned
+ * bad data, inform it.
+ */
+ if (rc->rc_tried && rc->rc_error == 0)
+ raidz_checksum_error(zio, rc);
+ rc->rc_error = ECKSUM;
+ goto done;
+ }
+
+ bcopy(orig, rc->rc_data, rc->rc_size);
+ zio_buf_free(orig, rc->rc_size);
}
}
- /*
- * There were no I/O errors, but the data doesn't checksum.
- * Try all permutations to see if we can find one that does.
- */
- ASSERT(zio->io_numerrors == 0);
- for (c = 0; c < rm->rm_cols; c++) {
- void *orig;
- rc = &rm->rm_col[c];
+ if (rm->rm_firstdatacol > 1 && rm->rm_col[VDEV_RAIDZ_Q].rc_error == 0) {
+ /*
+ * Attempt to reconstruct the data from parity Q.
+ */
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ void *orig;
+ rc = &rm->rm_col[c];
+
+ orig = zio_buf_alloc(rc->rc_size);
+ bcopy(rc->rc_data, orig, rc->rc_size);
+ vdev_raidz_reconstruct_q(rm, c);
- orig = zio_buf_alloc(rc->rc_size);
- bcopy(rc->rc_data, orig, rc->rc_size);
- vdev_raidz_reconstruct(rm, c);
+ if (zio_checksum_error(zio) == 0) {
+ zio_buf_free(orig, rc->rc_size);
+ zio->io_error = 0;
+ atomic_inc_64(&raidz_corrected_q);
+
+ /*
+ * If this child didn't know that it returned
+ * bad data, inform it.
+ */
+ if (rc->rc_tried && rc->rc_error == 0)
+ raidz_checksum_error(zio, rc);
+ rc->rc_error = ECKSUM;
+ goto done;
+ }
- if (zio_checksum_error(zio) == 0) {
+ bcopy(orig, rc->rc_data, rc->rc_size);
zio_buf_free(orig, rc->rc_size);
- zio->io_error = 0;
- /*
- * If this child didn't know that it returned bad data,
- * inform it.
- */
- if (rc->rc_tried && rc->rc_error == 0)
- raidz_checksum_error(zio, rc);
- rc->rc_error = ECKSUM;
- goto done;
}
+ }
- bcopy(orig, rc->rc_data, rc->rc_size);
- zio_buf_free(orig, rc->rc_size);
+ if (rm->rm_firstdatacol > 1 &&
+ rm->rm_col[VDEV_RAIDZ_P].rc_error == 0 &&
+ rm->rm_col[VDEV_RAIDZ_Q].rc_error == 0) {
+ /*
+ * Attempt to reconstruct the data from both P and Q.
+ */
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols - 1; c++) {
+ void *orig, *orig1;
+ rc = &rm->rm_col[c];
+
+ orig = zio_buf_alloc(rc->rc_size);
+ bcopy(rc->rc_data, orig, rc->rc_size);
+
+ for (c1 = c + 1; c1 < rm->rm_cols; c1++) {
+ rc1 = &rm->rm_col[c1];
+
+ orig1 = zio_buf_alloc(rc1->rc_size);
+ bcopy(rc1->rc_data, orig1, rc1->rc_size);
+
+ vdev_raidz_reconstruct_pq(rm, c, c1);
+
+ if (zio_checksum_error(zio) == 0) {
+ zio_buf_free(orig, rc->rc_size);
+ zio_buf_free(orig1, rc1->rc_size);
+ zio->io_error = 0;
+ atomic_inc_64(&raidz_corrected_pq);
+
+ /*
+ * If these children didn't know they
+ * returned bad data, inform them.
+ */
+ if (rc->rc_tried && rc->rc_error == 0)
+ raidz_checksum_error(zio, rc);
+ if (rc1->rc_tried && rc1->rc_error == 0)
+ raidz_checksum_error(zio, rc1);
+
+ rc->rc_error = ECKSUM;
+ rc1->rc_error = ECKSUM;
+
+ goto done;
+ }
+
+ bcopy(orig1, rc1->rc_data, rc1->rc_size);
+ zio_buf_free(orig1, rc1->rc_size);
+ }
+
+ bcopy(orig, rc->rc_data, rc->rc_size);
+ zio_buf_free(orig, rc->rc_size);
+ }
}
/*
- * All combinations failed to checksum. Generate checksum ereports for
- * every one.
+ * All combinations failed to checksum. Generate checksum ereports for
+ * all children.
*/
zio->io_error = ECKSUM;
if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
for (c = 0; c < rm->rm_cols; c++) {
rc = &rm->rm_col[c];
zfs_ereport_post(FM_EREPORT_ZFS_CHECKSUM,
- zio->io_spa, vd->vdev_child[rc->rc_col], zio,
+ zio->io_spa, vd->vdev_child[rc->rc_devidx], zio,
rc->rc_offset, rc->rc_size);
}
}
@@ -558,7 +1147,7 @@ done:
for (c = 0; c < rm->rm_cols; c++) {
rc = &rm->rm_col[c];
- cvd = vd->vdev_child[rc->rc_col];
+ cvd = vd->vdev_child[rc->rc_devidx];
if (rc->rc_error == 0)
continue;
@@ -571,8 +1160,8 @@ done:
zio_nowait(zio_vdev_child_io(rio, NULL, cvd,
rc->rc_offset, rc->rc_data, rc->rc_size,
ZIO_TYPE_WRITE, zio->io_priority,
- ZIO_FLAG_IO_REPAIR | ZIO_FLAG_CANFAIL |
- ZIO_FLAG_DONT_PROPAGATE, NULL, NULL));
+ ZIO_FLAG_IO_REPAIR | ZIO_FLAG_DONT_PROPAGATE |
+ ZIO_FLAG_CANFAIL, NULL, NULL));
}
zio_nowait(rio);
@@ -587,7 +1176,7 @@ done:
static void
vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded)
{
- if (faulted > 1)
+ if (faulted > vd->vdev_nparity)
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_NO_REPLICAS);
else if (degraded + faulted != 0)
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 0cff445cf3..137a402538 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -564,11 +564,18 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc)
return (error);
}
-/* ARGSUSED */
static int
zfs_ioc_vdev_remove(zfs_cmd_t *zc)
{
- return (ENOTSUP);
+ spa_t *spa;
+ int error;
+
+ error = spa_open(zc->zc_name, &spa, FTAG);
+ if (error != 0)
+ return (error);
+ error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
+ spa_close(spa, FTAG);
+ return (error);
}
static int
@@ -1176,6 +1183,12 @@ zfs_ioc_bookmark_name(zfs_cmd_t *zc)
return (error);
}
+static int
+zfs_ioc_promote(zfs_cmd_t *zc)
+{
+ return (dsl_dataset_promote(zc->zc_name));
+}
+
static zfs_ioc_vec_t zfs_ioc_vec[] = {
{ zfs_ioc_pool_create, zfs_secpolicy_config, pool_name },
{ zfs_ioc_pool_destroy, zfs_secpolicy_config, pool_name },
@@ -1215,7 +1228,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, no_name },
{ zfs_ioc_error_log, zfs_secpolicy_inject, pool_name },
{ zfs_ioc_clear, zfs_secpolicy_config, pool_name },
- { zfs_ioc_bookmark_name, zfs_secpolicy_inject, pool_name }
+ { zfs_ioc_bookmark_name, zfs_secpolicy_inject, pool_name },
+ { zfs_ioc_promote, zfs_secpolicy_write, dataset_name }
};
static int
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index bf7c9791fe..640ed4e960 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -1392,7 +1392,6 @@ zio_vdev_io_assess(zio_t *zio)
/* XXPOLICY */
if (zio_should_retry(zio)) {
ASSERT(tvd == vd);
- ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE));
zio->io_retries++;
zio->io_error = 0;
diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h
index 5aaca0662b..07ada9c30e 100644
--- a/usr/src/uts/common/sys/fs/zfs.h
+++ b/usr/src/uts/common/sys/fs/zfs.h
@@ -106,12 +106,14 @@ int zfs_prop_readonly(zfs_prop_t);
const char *zfs_prop_default_string(zfs_prop_t);
uint64_t zfs_prop_default_numeric(zfs_prop_t);
+
/*
- * On-disk format version.
+ * On-disk version number.
*/
#define ZFS_VERSION_1 1ULL
#define ZFS_VERSION_2 2ULL
-#define ZFS_VERSION ZFS_VERSION_2
+#define ZFS_VERSION_3 3ULL
+#define ZFS_VERSION ZFS_VERSION_3
/*
* Symbolic names for the changes that caused a ZFS_VERSION switch.
@@ -126,6 +128,11 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
*/
#define ZFS_VERSION_INITIAL ZFS_VERSION_1
#define ZFS_VERSION_DITTO_BLOCKS ZFS_VERSION_2
+#define ZFS_VERSION_SPARES ZFS_VERSION_3
+#define ZFS_VERSION_RAID6 ZFS_VERSION_3
+#define ZFS_VERSION_BPLIST_ACCOUNT ZFS_VERSION_3
+#define ZFS_VERSION_RAIDZ_DEFLATE ZFS_VERSION_3
+#define ZFS_VERSION_DNODE_BYTES ZFS_VERSION_3
/*
* The following are configuration names used in the nvlist describing a pool's
@@ -156,6 +163,9 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
#define ZPOOL_CONFIG_OFFLINE "offline"
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
+#define ZPOOL_CONFIG_SPARES "spares"
+#define ZPOOL_CONFIG_IS_SPARE "is_spare"
+#define ZPOOL_CONFIG_NPARITY "nparity"
#define VDEV_TYPE_ROOT "root"
#define VDEV_TYPE_MIRROR "mirror"
@@ -164,6 +174,7 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
#define VDEV_TYPE_DISK "disk"
#define VDEV_TYPE_FILE "file"
#define VDEV_TYPE_MISSING "missing"
+#define VDEV_TYPE_SPARE "spare"
/*
* This is needed in userland to report the minimum necessary device size.
@@ -206,18 +217,20 @@ typedef enum vdev_aux {
VDEV_AUX_TOO_SMALL, /* vdev size is too small */
VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */
VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
- VDEV_AUX_VERSION_OLDER /* on-disk version is too old */
+ VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
+ VDEV_AUX_SPARED /* hot spare used in another pool */
} vdev_aux_t;
/*
* pool state. The following states are written to disk as part of the normal
- * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED. The remaining states are
+ * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE. The remaining states are
* software abstractions used at various levels to communicate pool state.
*/
typedef enum pool_state {
POOL_STATE_ACTIVE = 0, /* In active use */
POOL_STATE_EXPORTED, /* Explicitly exported */
POOL_STATE_DESTROYED, /* Explicitly destroyed */
+ POOL_STATE_SPARE, /* Reserved for hot spare use */
POOL_STATE_UNINITIALIZED, /* Internal spa_t state */
POOL_STATE_UNAVAIL, /* Internal libzfs state */
POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
@@ -256,6 +269,7 @@ typedef struct vdev_stat {
uint64_t vs_aux; /* see vdev_aux_t */
uint64_t vs_alloc; /* space allocated */
uint64_t vs_space; /* total capacity */
+ uint64_t vs_dspace; /* deflated capacity */
uint64_t vs_rsize; /* replaceable dev size */
uint64_t vs_ops[ZIO_TYPES]; /* operation count */
uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */
@@ -335,7 +349,8 @@ typedef enum zfs_ioc {
ZFS_IOC_INJECT_LIST_NEXT,
ZFS_IOC_ERROR_LOG,
ZFS_IOC_CLEAR,
- ZFS_IOC_BOOKMARK_NAME
+ ZFS_IOC_BOOKMARK_NAME,
+ ZFS_IOC_PROMOTE
} zfs_ioc_t;
/*