summaryrefslogtreecommitdiff
path: root/usr/src/cmd
diff options
context:
space:
mode:
authoreschrock <none@none>2006-03-03 20:08:16 -0800
committereschrock <none@none>2006-03-03 20:08:16 -0800
commitea8dc4b6d2251b437950c0056bc626b311c73c27 (patch)
tree69cc1808568f2ef8fd1e21c61e186ba452ea64da /usr/src/cmd
parent5c18afbc96a46bc3a9e6f3667512daa374d6cd79 (diff)
downloadillumos-joyent-ea8dc4b6d2251b437950c0056bc626b311c73c27.tar.gz
PSARC 2006/077 zpool clear
PSARC 2006/139 FMA for ZFS 6284889 arc should replace the znode cache 6333006 DMU & DSL should not panic upon I/O error 6333092 concurrent reads to a file not scaling with number of readers 6338081 ZFS/FMA phase 1 6338386 need persistent error log 6341326 i/o error causes arc buf hash table corruption 6341639 zfs backup/restore should compute/verify checksum of backup stream 6348002 out of space due to changing properties 6354724 inaccurate error message from zfs restore 6354872 dmu_sync() blows predictive accounting 6355416 zpool scrubbing consumes all memory, system hung 6363995 df should only load libzfs when it encounters a ZFS filesystem 6366320 zfs backup/restore doesn't like signals 6368892 mount -m support needed for legacy mounts 6368902 boot archive fstat support needed for ZFS Mountroot 6369424 BFU complains when bfu'ing a ZFS root filesystem 6374062 mountroot support needed for ZFS 6376356 dirtying dbuf obj=43 lvl=0 blkid=0 but not tx_held 6378391 unused members of dmu_objset_stats_t 6378392 clean up zfs_cmd_t structure 6378685 buf_init should allocate its hash table more carefully 6378976 ziltest should be a first class citizen 6381086 zdb segfaults if there is a spa deferred-free bplist 6381203 deadlock due to i/o while assigning (tc_lock held) 6381209 freed space is not immediately available 6381344 'zpool clear' 6381345 FAULTED devices should really be UNAVAIL 6381346 import should mark devices as persistently unavailable 6383272 recursive mutex_enter() during log replay with zfs root 6386326 origin property is not displayed 6386354 libzfs does too much in its _init section, calls exit(1) 6386624 zpool should not complain about non-existent devices from libdiskmgt 6386910 spa needs to be i/o error hardened 6387735 need a mechanism to inject faults into ZFS 6387736 internal ZFS utilities should be placed in an ON-private package 6389928 libzfs should ship a lint library 6390609 malformed vdev config panics on zpool_create() 6390677 version number checking makes upgrades challenging 6390713 ztest hangs in zil_suspend() 6391873 metadata compression should be turned back on 6392113 ztest sometimes reports leaked blocks because ZIL isn't resilvered 6393004 minor memory leak in unique_insert()
Diffstat (limited to 'usr/src/cmd')
-rw-r--r--usr/src/cmd/Makefile6
-rw-r--r--usr/src/cmd/fm/dicts/ZFS.dict11
-rw-r--r--usr/src/cmd/fm/dicts/ZFS.po39
-rw-r--r--usr/src/cmd/fm/modules/common/Makefile2
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-diagnosis/Makefile33
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs-diagnosis.conf32
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c423
-rw-r--r--usr/src/cmd/fm/schemes/Makefile8
-rw-r--r--usr/src/cmd/fm/schemes/zfs/Makefile32
-rw-r--r--usr/src/cmd/fm/schemes/zfs/amd64/Makefile33
-rw-r--r--usr/src/cmd/fm/schemes/zfs/i386/Makefile32
-rw-r--r--usr/src/cmd/fm/schemes/zfs/scheme.c191
-rw-r--r--usr/src/cmd/fm/schemes/zfs/sparc/Makefile32
-rw-r--r--usr/src/cmd/fm/schemes/zfs/sparcv9/Makefile33
-rw-r--r--usr/src/cmd/fs.d/df.c57
-rw-r--r--usr/src/cmd/mdb/common/modules/zfs/zfs.c9
-rw-r--r--usr/src/cmd/truss/codes.c17
-rw-r--r--usr/src/cmd/zdb/zdb.c128
-rw-r--r--usr/src/cmd/zdb/zdb_il.c19
-rw-r--r--usr/src/cmd/zfs/zfs_main.c15
-rw-r--r--usr/src/cmd/zinject/Makefile54
-rw-r--r--usr/src/cmd/zinject/Makefile.com55
-rw-r--r--usr/src/cmd/zinject/amd64/Makefile31
-rw-r--r--usr/src/cmd/zinject/i386/Makefile30
-rw-r--r--usr/src/cmd/zinject/sparcv9/Makefile31
-rw-r--r--usr/src/cmd/zinject/translate.c458
-rw-r--r--usr/src/cmd/zinject/zinject.c739
-rw-r--r--usr/src/cmd/zinject/zinject.h64
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c14
-rw-r--r--usr/src/cmd/zpool/zpool_main.c189
-rw-r--r--usr/src/cmd/zpool/zpool_vdev.c12
-rw-r--r--usr/src/cmd/ztest/Makefile13
-rw-r--r--usr/src/cmd/ztest/ztest.c352
33 files changed, 2872 insertions, 322 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index c8b16c991d..e8ec1adb3b 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -408,6 +407,7 @@ COMMON_SUBDIRS= \
zdump \
zfs \
zic \
+ zinject \
zlogin \
zoneadm \
zoneadmd \
diff --git a/usr/src/cmd/fm/dicts/ZFS.dict b/usr/src/cmd/fm/dicts/ZFS.dict
index 0166183535..89b10434f5 100644
--- a/usr/src/cmd/fm/dicts/ZFS.dict
+++ b/usr/src/cmd/fm/dicts/ZFS.dict
@@ -1,13 +1,12 @@
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -27,7 +26,7 @@
# DO NOT EDIT -- this file is generated by the Event Registry.
#
-FMDICT: name=ZFS version=1 maxkey=1
+FMDICT: name=ZFS version=1 maxkey=1 dictid=0x5a46
ereport.fs.zfs.pool.corrupt_cache=1
ereport.fs.zfs.device.missing_r=2
@@ -39,3 +38,5 @@ ereport.fs.zfs.pool.corrupt_pool=7
ereport.fs.zfs.object.corrupt_data=8
ereport.fs.zfs.device.failing=9
ereport.fs.zfs.device.version_mismatch=10
+fault.fs.zfs.pool=11
+fault.fs.zfs.device=12
diff --git a/usr/src/cmd/fm/dicts/ZFS.po b/usr/src/cmd/fm/dicts/ZFS.po
index ea5a9c6195..a1d26715be 100644
--- a/usr/src/cmd/fm/dicts/ZFS.po
+++ b/usr/src/cmd/fm/dicts/ZFS.po
@@ -1,13 +1,12 @@
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -186,3 +185,35 @@ msgid "ZFS-8000-A5.impact"
msgstr "The pool is unavailable"
msgid "ZFS-8000-A5.action"
msgstr "\nIf this error is seen during 'zpool import', see the section below. Otherwise,\nrun 'zpool status -x' to determine which pool is faulted:\n\n\n# zpool status -x\n pool: test\n state: FAULTED\nstatus: The ZFS version for the pool is incompatible with the software running\n on this system.\naction: Destroy and re-create the pool.\n scrub: none requested\nconfig:\n\n NAME STATE READ WRITE CKSUM\n test FAULTED 0 0 0 incompatible version\n mirror ONLINE 0 0 0\n c0t0d0 ONLINE 0 0 0\n c0t0d1 ONLINE 0 0 0\n\n\nThe pool cannot be used on this system. Either move the disks to the system\nwhere they were originally created, or destroy the pool and re-create it from\nbackup.\n\n\nIf this error is seen during import, the pool cannot be imported on the current\nsystem. The disks must be attached to the system which originally created the\npool, and imported there.\n "
+#
+# code: ZFS-8000-CS
+# keys: fault.fs.zfs.pool
+#
+msgid "ZFS-8000-CS.type"
+msgstr "Fault"
+msgid "ZFS-8000-CS.severity"
+msgstr "Major"
+msgid "ZFS-8000-CS.description"
+msgstr "A ZFS pool failed to open. Refer to %s for more information."
+msgid "ZFS-8000-CS.response"
+msgstr "No automated response will occur."
+msgid "ZFS-8000-CS.impact"
+msgstr "The pool data is unavailable"
+msgid "ZFS-8000-CS.action"
+msgstr "Run 'zpool status -x' and either attach the missing device or\n restore from backup."
+#
+# code: ZFS-8000-D3
+# keys: fault.fs.zfs.device
+#
+msgid "ZFS-8000-D3.type"
+msgstr "Fault"
+msgid "ZFS-8000-D3.severity"
+msgstr "Major"
+msgid "ZFS-8000-D3.description"
+msgstr "A ZFS device failed. Refer to %s for more information."
+msgid "ZFS-8000-D3.response"
+msgstr "No automated response will occur."
+msgid "ZFS-8000-D3.impact"
+msgstr "Fault tolerance of the pool may be compromised."
+msgid "ZFS-8000-D3.action"
+msgstr "Run 'zpool status -x' and replace the bad device."
diff --git a/usr/src/cmd/fm/modules/common/Makefile b/usr/src/cmd/fm/modules/common/Makefile
index 75dd15ef9e..868a66df08 100644
--- a/usr/src/cmd/fm/modules/common/Makefile
+++ b/usr/src/cmd/fm/modules/common/Makefile
@@ -27,6 +27,6 @@
#
SUBDIRS = cpumem-retire eversholt io-retire ip-transport snmp-trapgen \
- syslog-msgs
+ syslog-msgs zfs-diagnosis
include ../../Makefile.subdirs
diff --git a/usr/src/cmd/fm/modules/common/zfs-diagnosis/Makefile b/usr/src/cmd/fm/modules/common/zfs-diagnosis/Makefile
new file mode 100644
index 0000000000..03a7a0dda4
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-diagnosis/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+MODULE = zfs-diagnosis
+CLASS = common
+SRCS = zfs_de.c
+
+include ../../Makefile.plugin
+
+LDLIBS += -luutil
diff --git a/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs-diagnosis.conf b/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs-diagnosis.conf
new file mode 100644
index 0000000000..cd493d69bc
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs-diagnosis.conf
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# fmd configuration file for the zfs.so diagnosis engine.
+#
+subscribe ereport.fs.zfs.*
+subscribe resource.fs.zfs.*
+subscribe fault.fs.zfs.*
+dictionary ZFS
diff --git a/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c b/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c
new file mode 100644
index 0000000000..02c1a31e2c
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c
@@ -0,0 +1,423 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <assert.h>
+#include <stddef.h>
+#include <strings.h>
+#include <libuutil.h>
+#include <fm/fmd_api.h>
+#include <sys/fs/zfs.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/fs/zfs.h>
+
+typedef struct zfs_case_data {
+ uint64_t zc_version;
+ uint64_t zc_ena;
+ uint64_t zc_pool_guid;
+ uint64_t zc_vdev_guid;
+ int zc_has_timer;
+ int zc_pool_state;
+} zfs_case_data_t;
+
+typedef struct zfs_case {
+ int zc_version;
+ zfs_case_data_t zc_data;
+ fmd_case_t *zc_case;
+ uu_list_node_t zc_node;
+ id_t zc_timer;
+} zfs_case_t;
+
+#define CASE_DATA "data"
+#define CASE_DATA_VERSION 1
+
+static int zfs_case_timeout;
+
+uu_list_pool_t *zfs_case_pool;
+uu_list_t *zfs_cases;
+
+static void
+zfs_case_serialize(fmd_hdl_t *hdl, zfs_case_t *zcp)
+{
+ fmd_buf_write(hdl, zcp->zc_case, CASE_DATA, &zcp->zc_data,
+ sizeof (zcp->zc_data));
+}
+
+static zfs_case_t *
+zfs_case_unserialize(fmd_hdl_t *hdl, fmd_case_t *cp)
+{
+ zfs_case_t *zcp;
+
+ zcp = fmd_hdl_zalloc(hdl, sizeof (zfs_case_t), FMD_SLEEP);
+ zcp->zc_case = cp;
+
+ fmd_buf_read(hdl, cp, CASE_DATA, &zcp->zc_data,
+ sizeof (zcp->zc_data));
+
+ if (zcp->zc_data.zc_version != CASE_DATA_VERSION) {
+ fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t));
+ return (NULL);
+ }
+
+ if (zcp->zc_data.zc_has_timer)
+ zcp->zc_timer = fmd_timer_install(hdl, zcp,
+ NULL, zfs_case_timeout);
+
+ (void) uu_list_insert_before(zfs_cases, NULL, zcp);
+
+ fmd_case_setspecific(hdl, cp, zcp);
+
+ return (zcp);
+}
+
+/*ARGSUSED*/
+static void
+zfs_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
+{
+ zfs_case_t *zcp;
+ int32_t pool_state;
+ uint64_t ena, pool_guid, vdev_guid;
+ nvlist_t *detector;
+ boolean_t isresource;
+
+ isresource = fmd_nvl_class_match(hdl, nvl, "resource.fs.zfs.*");
+
+ if (isresource) {
+ /*
+ * For our faked-up 'ok' resource (see below), we have no normal
+ * payload members.
+ */
+ if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
+ &vdev_guid) != 0)
+ pool_state = SPA_LOAD_OPEN;
+ else
+ pool_state = SPA_LOAD_NONE;
+ detector = NULL;
+ } else {
+ (void) nvlist_lookup_nvlist(nvl,
+ FM_EREPORT_DETECTOR, &detector);
+ (void) nvlist_lookup_int32(nvl,
+ FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, &pool_state);
+ }
+
+ /*
+ * Without a retire agent, we subscribe to our own faults and just
+ * discard them.
+ */
+ if (fmd_nvl_class_match(hdl, nvl, "fault.fs.zfs.*"))
+ return;
+
+ /*
+ * Ignore all block level (.io and .checksum) errors not associated with
+ * a pool open. We should really update a bean counter, and eventually
+ * do some real predictive analysis based on these faults.
+ */
+ if ((fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.io") ||
+ fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.checksum")) &&
+ pool_state == SPA_LOAD_NONE)
+ return;
+
+ /*
+ * We also ignore all ereports generated during an import of a pool,
+ * since the only possible fault (.pool) would result in import failure,
+ * and hence no persistent fault. Some day we may want to do something
+ * with these ereports, so we continue generating them internally.
+ */
+ if (pool_state == SPA_LOAD_IMPORT)
+ return;
+
+ /*
+ * Determine if this ereport corresponds to an open case. Cases are
+ * indexed by ENA, since ZFS does all the work of chaining together
+ * related ereports.
+ *
+ * We also detect if an ereport corresponds to an open case by context,
+ * such as:
+ *
+ * - An error occurred during an open of a pool with an existing
+ * case.
+ *
+ * - An error occurred for a device which already has an open
+ * case.
+ */
+ if (!isresource) {
+ (void) nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena);
+ (void) nvlist_lookup_uint64(nvl,
+ FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, &pool_guid);
+ if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.vdev.*"))
+ (void) nvlist_lookup_uint64(nvl,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid);
+ else
+ vdev_guid = 0;
+ } else {
+ (void) nvlist_lookup_uint64(nvl,
+ FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, &pool_guid);
+ if (nvlist_lookup_uint64(nvl,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
+ vdev_guid = 0;
+ }
+
+ for (zcp = uu_list_first(zfs_cases); zcp != NULL;
+ zcp = uu_list_next(zfs_cases, zcp)) {
+ /*
+ * Matches a known ENA.
+ */
+ if (zcp->zc_data.zc_ena == ena)
+ break;
+
+ /*
+ * Matches a case involving load errors for this same pool.
+ */
+ if (zcp->zc_data.zc_pool_guid == pool_guid &&
+ zcp->zc_data.zc_pool_state == SPA_LOAD_OPEN &&
+ pool_state == SPA_LOAD_OPEN)
+ break;
+
+ /*
+ * Device errors for the same device.
+ */
+ if (vdev_guid != 0 && zcp->zc_data.zc_vdev_guid == vdev_guid)
+ break;
+ }
+
+ if (zcp == NULL) {
+ fmd_case_t *cs;
+ zfs_case_data_t data;
+
+ /*
+ * If this is one of our 'fake' resource ereports, and there is
+ * no case open, simply discard it.
+ */
+ if (isresource)
+ return;
+
+ /*
+ * Open a new case.
+ */
+ cs = fmd_case_open(hdl, NULL);
+
+ /*
+ * Initialize the case buffer. To commonize code, we actually
+ * create the buffer with existing data, and then call
+ * zfs_case_unserialize() to instantiate the in-core structure.
+ */
+ fmd_buf_create(hdl, cs, CASE_DATA,
+ sizeof (zfs_case_data_t));
+
+ data.zc_version = CASE_DATA_VERSION;
+ data.zc_ena = ena;
+ data.zc_pool_guid = pool_guid;
+ data.zc_vdev_guid = vdev_guid;
+ data.zc_has_timer = 0;
+ data.zc_pool_state = (int)pool_state;
+
+ fmd_buf_write(hdl, cs, CASE_DATA, &data, sizeof (data));
+
+ zcp = zfs_case_unserialize(hdl, cs);
+ assert(zcp != NULL);
+ }
+
+ /*
+ * The 'resource.fs.zfs.ok' event is a special internal-only event that
+ * signifies that a pool or device that was previously faulted has now
+ * come online (as detected by ZFS). This allows us to close the
+ * associated case.
+ */
+ if (isresource) {
+ fmd_case_close(hdl, zcp->zc_case);
+ return;
+ }
+
+ /*
+ * Associate the ereport with this case.
+ */
+ fmd_case_add_ereport(hdl, zcp->zc_case, ep);
+
+ /*
+ * Don't do anything else if this case is already solved.
+ */
+ if (fmd_case_solved(hdl, zcp->zc_case))
+ return;
+
+ /*
+ * Determine if we should solve the case and generate a fault. We solve
+ * a case if:
+ *
+ * a. A pool failed to open (ereport.fs.zfs.pool)
+ * b. A device failed to open (ereport.fs.zfs.pool) while a pool
+ * was up and running.
+ *
+ * We may see a series of ereports associated with a pool open, all
+ * chained together by the same ENA. If the pool open succeeds, then
+ * we'll see no further ereports. To detect when a pool open has
+ * succeeded, we associate a timer with the event. When it expires, we
+ * close the case.
+ */
+ if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.zpool")) {
+ /*
+ * Pool level fault.
+ */
+ nvlist_t *fault;
+
+ fault = fmd_nvl_create_fault(hdl, "fault.fs.zfs.pool",
+ 100, detector, NULL, detector);
+ fmd_case_add_suspect(hdl, zcp->zc_case, fault);
+ fmd_case_solve(hdl, zcp->zc_case);
+
+ if (zcp->zc_data.zc_has_timer) {
+ fmd_timer_remove(hdl, zcp->zc_timer);
+ zcp->zc_data.zc_has_timer = 0;
+ zfs_case_serialize(hdl, zcp);
+ }
+
+ } else if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.vdev.*") &&
+ pool_state == SPA_LOAD_NONE) {
+ /*
+ * Device fault.
+ */
+ nvlist_t *fault;
+
+ fault = fmd_nvl_create_fault(hdl, "fault.fs.zfs.device",
+ 100, detector, NULL, detector);
+ fmd_case_add_suspect(hdl, zcp->zc_case, fault);
+ fmd_case_solve(hdl, zcp->zc_case);
+
+ if (zcp->zc_data.zc_has_timer) {
+ fmd_timer_remove(hdl, zcp->zc_timer);
+ zcp->zc_data.zc_has_timer = 0;
+ zfs_case_serialize(hdl, zcp);
+ }
+
+ } else if (pool_state == SPA_LOAD_OPEN) {
+ /*
+ * Error incurred during a pool open. Reset the timer
+ * associated with this case.
+ */
+ if (zcp->zc_data.zc_has_timer)
+ fmd_timer_remove(hdl, zcp->zc_timer);
+ zcp->zc_timer = fmd_timer_install(hdl, zcp, NULL,
+ zfs_case_timeout);
+ if (!zcp->zc_data.zc_has_timer) {
+ zcp->zc_data.zc_has_timer = 1;
+ zfs_case_serialize(hdl, zcp);
+ }
+ }
+}
+
+/*
+ * Timeout - indicates that a pool had faults, but was eventually opened
+ * successfully.
+ */
+/* ARGSUSED */
+static void
+zfs_timeout(fmd_hdl_t *hdl, id_t id, void *data)
+{
+ zfs_case_t *zcp = data;
+
+ zcp->zc_data.zc_has_timer = 0;
+
+ fmd_case_close(hdl, zcp->zc_case);
+}
+
+static void
+zfs_close(fmd_hdl_t *hdl, fmd_case_t *cs)
+{
+ zfs_case_t *zcp = fmd_case_getspecific(hdl, cs);
+
+ if (zcp->zc_data.zc_has_timer)
+ fmd_timer_remove(hdl, zcp->zc_timer);
+ uu_list_remove(zfs_cases, zcp);
+ fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t));
+}
+
+static const fmd_hdl_ops_t fmd_ops = {
+ zfs_recv, /* fmdo_recv */
+ zfs_timeout, /* fmdo_timeout */
+ zfs_close, /* fmdo_close */
+ NULL, /* fmdo_stats */
+ NULL, /* fmdo_gc */
+};
+
+static const fmd_prop_t fmd_props[] = {
+ { "case_timeout", FMD_TYPE_UINT32, "5" },
+ { NULL, 0, NULL }
+};
+
+static const fmd_hdl_info_t fmd_info = {
+ "ZFS Diagnosis Engine", "1.0", &fmd_ops, fmd_props
+};
+
+void
+_fmd_init(fmd_hdl_t *hdl)
+{
+ fmd_case_t *cp;
+
+ if ((zfs_case_pool = uu_list_pool_create("zfs_case_pool",
+ sizeof (zfs_case_t), offsetof(zfs_case_t, zc_node),
+ NULL, 0)) == NULL)
+ return;
+
+ if ((zfs_cases = uu_list_create(zfs_case_pool, NULL, 0)) == NULL) {
+ uu_list_pool_destroy(zfs_case_pool);
+ return;
+ }
+
+ if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
+ uu_list_destroy(zfs_cases);
+ uu_list_pool_destroy(zfs_case_pool);
+ return;
+ }
+
+ /*
+ * Iterate over all active cases and unserialize the associated buffers,
+ * adding them to our list of open cases.
+ */
+ for (cp = fmd_case_next(hdl, NULL);
+ cp != NULL; cp = fmd_case_next(hdl, cp))
+ (void) zfs_case_unserialize(hdl, cp);
+
+ zfs_case_timeout = fmd_prop_get_int32(hdl, "case_timeout") * NANOSEC;
+}
+
+void
+_fmd_fini(fmd_hdl_t *hdl)
+{
+ zfs_case_t *zcp;
+ uu_list_walk_t *walk;
+
+ /*
+ * Remove all active cases.
+ */
+ walk = uu_list_walk_start(zfs_cases, UU_WALK_ROBUST);
+ while ((zcp = uu_list_walk_next(walk)) != NULL) {
+ uu_list_remove(zfs_cases, zcp);
+ fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t));
+ }
+ uu_list_walk_end(walk);
+
+ uu_list_destroy(zfs_cases);
+ uu_list_pool_destroy(zfs_case_pool);
+}
diff --git a/usr/src/cmd/fm/schemes/Makefile b/usr/src/cmd/fm/schemes/Makefile
index 4f1dd443df..8dfc6ff36d 100644
--- a/usr/src/cmd/fm/schemes/Makefile
+++ b/usr/src/cmd/fm/schemes/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -33,6 +32,7 @@ SUBDIRS = \
legacy-hc \
mem \
mod \
- pkg
+ pkg \
+ zfs
include ../Makefile.subdirs
diff --git a/usr/src/cmd/fm/schemes/zfs/Makefile b/usr/src/cmd/fm/schemes/zfs/Makefile
new file mode 100644
index 0000000000..0c82190bb3
--- /dev/null
+++ b/usr/src/cmd/fm/schemes/zfs/Makefile
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+include ../../../Makefile.cmd
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+include ../../Makefile.subdirs
diff --git a/usr/src/cmd/fm/schemes/zfs/amd64/Makefile b/usr/src/cmd/fm/schemes/zfs/amd64/Makefile
new file mode 100644
index 0000000000..b3e2565271
--- /dev/null
+++ b/usr/src/cmd/fm/schemes/zfs/amd64/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+include ../../Makefile.com
+include $(SRC)/Makefile.master.64
+include ../../Makefile.targ
+
+LDLIBS += -lzfs
+
+install: all $(ROOTPROG64)
diff --git a/usr/src/cmd/fm/schemes/zfs/i386/Makefile b/usr/src/cmd/fm/schemes/zfs/i386/Makefile
new file mode 100644
index 0000000000..11a1534892
--- /dev/null
+++ b/usr/src/cmd/fm/schemes/zfs/i386/Makefile
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+include ../../Makefile.com
+include ../../Makefile.targ
+
+LDLIBS += -lzfs
+
+install: all $(ROOTPROG)
diff --git a/usr/src/cmd/fm/schemes/zfs/scheme.c b/usr/src/cmd/fm/schemes/zfs/scheme.c
new file mode 100644
index 0000000000..7f2532a637
--- /dev/null
+++ b/usr/src/cmd/fm/schemes/zfs/scheme.c
@@ -0,0 +1,191 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <fm/fmd_fmri.h>
+#include <strings.h>
+#include <libzfs.h>
+
+typedef struct cbdata {
+ uint64_t cb_guid;
+ zpool_handle_t *cb_pool;
+} cbdata_t;
+
+static int
+find_pool(zpool_handle_t *zhp, void *data)
+{
+ cbdata_t *cbp = data;
+
+ if (zpool_get_guid(zhp) == cbp->cb_guid) {
+ cbp->cb_pool = zhp;
+ return (1);
+ }
+
+ zpool_close(zhp);
+
+ return (0);
+}
+
+ssize_t
+fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
+{
+ uint64_t pool_guid, vdev_guid;
+ cbdata_t cb;
+ ssize_t len;
+ const char *name;
+ char guidbuf[64];
+
+ (void) nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_POOL, &pool_guid);
+
+ /*
+ * Attempt to convert the pool guid to a name.
+ */
+ cb.cb_guid = pool_guid;
+ cb.cb_pool = NULL;
+
+ if (zpool_iter(find_pool, &cb) == 1) {
+ name = zpool_get_name(cb.cb_pool);
+ } else {
+ (void) snprintf(guidbuf, sizeof (guidbuf), "%llx", pool_guid);
+ name = guidbuf;
+ }
+
+ if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) == 0)
+ len = snprintf(buf, buflen, "%s://pool=%s/vdev=%llx",
+ FM_FMRI_SCHEME_ZFS, name, vdev_guid);
+ else
+ len = snprintf(buf, buflen, "%s://pool=%s",
+ FM_FMRI_SCHEME_ZFS, name);
+
+ if (cb.cb_pool)
+ zpool_close(cb.cb_pool);
+
+ return (len);
+}
+
+static nvlist_t *
+find_vdev_iter(nvlist_t *nv, uint64_t search)
+{
+ uint_t c, children;
+ nvlist_t **child;
+ uint64_t guid;
+ nvlist_t *ret;
+
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid);
+
+ if (search == guid)
+ return (nv);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0)
+ return (0);
+
+ for (c = 0; c < children; c++)
+ if ((ret = find_vdev_iter(child[c], search)) != 0)
+ return (ret);
+
+ return (NULL);
+}
+
+static nvlist_t *
+find_vdev(zpool_handle_t *zhp, uint64_t guid)
+{
+ nvlist_t *config;
+ nvlist_t *nvroot;
+
+ config = zpool_get_config(zhp, NULL);
+
+ (void) nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
+
+ return (find_vdev_iter(nvroot, guid));
+}
+
+int
+fmd_fmri_present(nvlist_t *nvl)
+{
+ uint64_t pool_guid, vdev_guid;
+ cbdata_t cb;
+ int ret;
+
+ (void) nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_POOL, &pool_guid);
+
+ cb.cb_guid = pool_guid;
+ cb.cb_pool = NULL;
+
+ if (zpool_iter(find_pool, &cb) != 1)
+ return (0);
+
+ if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) != 0) {
+ zpool_close(cb.cb_pool);
+ return (1);
+ }
+
+ ret = (find_vdev(cb.cb_pool, vdev_guid) != NULL);
+
+ zpool_close(cb.cb_pool);
+
+ return (ret);
+}
+
+int
+fmd_fmri_unusable(nvlist_t *nvl)
+{
+ uint64_t pool_guid, vdev_guid;
+ cbdata_t cb;
+ nvlist_t *vd;
+ int ret;
+
+ (void) nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_POOL, &pool_guid);
+
+ cb.cb_guid = pool_guid;
+ cb.cb_pool = NULL;
+
+ if (zpool_iter(find_pool, &cb) != 1)
+ return (1);
+
+ if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) != 0) {
+ ret = (zpool_get_state(cb.cb_pool) == POOL_STATE_UNAVAIL);
+ zpool_close(cb.cb_pool);
+ return (ret);
+ }
+
+ vd = find_vdev(cb.cb_pool, vdev_guid);
+ if (vd == NULL) {
+ ret = 1;
+ } else {
+ vdev_stat_t *vs;
+ uint_t c;
+
+ (void) nvlist_lookup_uint64_array(vd, ZPOOL_CONFIG_STATS,
+ (uint64_t **)&vs, &c);
+
+ ret = (vs->vs_state < VDEV_STATE_DEGRADED);
+ }
+
+ zpool_close(cb.cb_pool);
+
+ return (ret);
+}
diff --git a/usr/src/cmd/fm/schemes/zfs/sparc/Makefile b/usr/src/cmd/fm/schemes/zfs/sparc/Makefile
new file mode 100644
index 0000000000..11a1534892
--- /dev/null
+++ b/usr/src/cmd/fm/schemes/zfs/sparc/Makefile
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+include ../../Makefile.com
+include ../../Makefile.targ
+
+LDLIBS += -lzfs
+
+install: all $(ROOTPROG)
diff --git a/usr/src/cmd/fm/schemes/zfs/sparcv9/Makefile b/usr/src/cmd/fm/schemes/zfs/sparcv9/Makefile
new file mode 100644
index 0000000000..b3e2565271
--- /dev/null
+++ b/usr/src/cmd/fm/schemes/zfs/sparcv9/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+include ../../Makefile.com
+include $(SRC)/Makefile.master.64
+include ../../Makefile.targ
+
+LDLIBS += -lzfs
+
+install: all $(ROOTPROG64)
diff --git a/usr/src/cmd/fs.d/df.c b/usr/src/cmd/fs.d/df.c
index 2650f41811..0a38f44b1a 100644
--- a/usr/src/cmd/fs.d/df.c
+++ b/usr/src/cmd/fs.d/df.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -251,29 +250,19 @@ static void (*_zfs_close)(zfs_handle_t *);
static uint64_t (*_zfs_prop_get_int)(zfs_handle_t *, zfs_prop_t);
static void (*_zfs_set_error_handler)(void (*)(const char *, va_list));
-int
-main(int argc, char *argv[])
+/*
+ * Dynamically check for libzfs, in case the user hasn't installed the SUNWzfs
+ * packages. A basic utility such as df shouldn't depend on optional
+ * filesystems.
+ */
+static int
+load_libzfs(void)
{
void *hdl;
- (void) setlocale(LC_ALL, "");
-
-#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
-#define TEXT_DOMAIN "SYS_TEST"
-#endif
- (void) textdomain(TEXT_DOMAIN);
-
- program_name = basename(argv[0]);
-
-#ifdef _iBCS2
- sysv3_set = getenv("SYSV3");
-#endif /* _iBCS2 */
+ if (_zfs_open != NULL)
+ return (1);
- /*
- * Dynamically check for libzfs, in case the user hasn't installed the
- * SUNWzfs packages. A basic utility such as df shouldn't depend on
- * optional filesystems.
- */
if ((hdl = dlopen("libzfs.so", RTLD_LAZY)) != NULL) {
_zfs_set_error_handler = (void (*)())
dlsym(hdl, "zfs_set_error_handler");
@@ -292,9 +281,29 @@ main(int argc, char *argv[])
* like "can't open ..." under race conditions.
*/
_zfs_set_error_handler(dummy_error_handler);
+ return (1);
}
}
+ return (0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ (void) setlocale(LC_ALL, "");
+
+#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+ (void) textdomain(TEXT_DOMAIN);
+
+ program_name = basename(argv[0]);
+
+#ifdef _iBCS2
+ sysv3_set = getenv("SYSV3");
+#endif /* _iBCS2 */
+
if (EQ(program_name, DEVNM_CMD))
do_devnm(argc, argv);
@@ -1231,7 +1240,7 @@ adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total,
uint64_t quota;
if (strcmp(DFR_FSTYPE(dfrp), MNTTYPE_ZFS) != 0 ||
- _zfs_open == NULL)
+ !load_libzfs())
return;
/*
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index 27b0630c72..da7e87dcd2 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -831,7 +830,7 @@ spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
}
if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
- state = "UKNNOWN";
+ state = "UNKNOWN";
else
state = statetab[spa.spa_state];
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c
index 377430909f..3fdbace8b7 100644
--- a/usr/src/cmd/truss/codes.c
+++ b/usr/src/cmd/truss/codes.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -926,6 +925,18 @@ const struct ioc {
"zfs_cmd_t" },
{ (uint_t)ZFS_IOC_SENDBACKUP, "ZFS_IOC_SENDBACKUP",
"zfs_cmd_t" },
+ { (uint_t)ZFS_IOC_INJECT_FAULT, "ZFS_IOC_INJECT_FAULT",
+ "zfs_cmd_t" },
+ { (uint_t)ZFS_IOC_CLEAR_FAULT, "ZFS_IOC_CLEAR_FAULT",
+ "zfs_cmd_t" },
+ { (uint_t)ZFS_IOC_INJECT_LIST_NEXT, "ZFS_IOC_INJECT_LIST_NEXT",
+ "zfs_cmd_t" },
+ { (uint_t)ZFS_IOC_ERROR_LOG, "ZFS_IOC_ERROR_LOG",
+ "zfs_cmd_t" },
+ { (uint_t)ZFS_IOC_CLEAR, "ZFS_IOC_CLEAR",
+ "zfs_cmd_t" },
+ { (uint_t)ZFS_IOC_BOOKMARK_NAME, "ZFS_IOC_BOOKMARK_NAME",
+ "zfs_cmd_t" },
/* kssl ioctls */
{ (uint_t)KSSL_ADD_ENTRY, "KSSL_ADD_ENTRY",
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index 545f1d0df3..990e215c46 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -193,7 +192,7 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
size_t nvsize = *(uint64_t *)data;
char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
- dmu_read(os, object, 0, nvsize, packed);
+ VERIFY(0 == dmu_read(os, object, 0, nvsize, packed));
VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
@@ -365,7 +364,8 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
*/
alloc = 0;
for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
- dmu_read(os, smo->smo_object, offset, sizeof (entry), &entry);
+ VERIFY(0 == dmu_read(os, smo->smo_object, offset,
+ sizeof (entry), &entry));
if (SM_DEBUG_DECODE(entry)) {
(void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
(u_longlong_t)(offset / sizeof (entry)),
@@ -434,10 +434,10 @@ dump_metaslabs(spa_t *spa)
for (c = 0; c < rvd->vdev_children; c++) {
vd = rvd->vdev_child[c];
- spa_config_enter(spa, RW_READER);
+ spa_config_enter(spa, RW_READER, FTAG);
(void) printf("\n vdev %llu = %s\n\n",
(u_longlong_t)vd->vdev_id, vdev_description(vd));
- spa_config_exit(spa);
+ spa_config_exit(spa, FTAG);
if (dump_opt['d'] <= 5) {
(void) printf("\t%10s %10s %5s\n",
@@ -463,9 +463,9 @@ dump_dtl(vdev_t *vd, int indent)
if (indent == 0)
(void) printf("\nDirty time logs:\n\n");
- spa_config_enter(spa, RW_READER);
+ spa_config_enter(spa, RW_READER, FTAG);
(void) printf("\t%*s%s\n", indent, "", vdev_description(vd));
- spa_config_exit(spa);
+ spa_config_exit(spa, FTAG);
for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) {
/*
@@ -523,11 +523,11 @@ zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
if (bc->bc_errno) {
(void) sprintf(buffer,
- "Error %d reading <%llu, %llu, %d, %llu>: ",
+ "Error %d reading <%llu, %llu, %lld, %llu>: ",
bc->bc_errno,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
- zb->zb_level,
+ (u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid);
goto out;
}
@@ -547,7 +547,6 @@ zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
bpx < bpend; bpx++) {
if (bpx->blk_birth != 0) {
- ASSERT(bpx->blk_fill > 0);
fill += bpx->blk_fill;
} else {
ASSERT(bpx->blk_fill == 0);
@@ -575,8 +574,8 @@ zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
if (l == zb->zb_level) {
- (void) sprintf(buffer + strlen(buffer), "L%x",
- zb->zb_level);
+ (void) sprintf(buffer + strlen(buffer), "L%llx",
+ (u_longlong_t)zb->zb_level);
} else {
(void) sprintf(buffer + strlen(buffer), " ");
}
@@ -730,7 +729,7 @@ dump_bplist(objset_t *mos, uint64_t object, char *name)
if (dump_opt['d'] < 3)
return;
- bplist_open(&bpl, mos, object);
+ VERIFY(0 == bplist_open(&bpl, mos, object));
if (bplist_empty(&bpl)) {
bplist_close(&bpl);
return;
@@ -776,20 +775,20 @@ znode_path(objset_t *os, uint64_t object, char *pathbuf, size_t size)
size_t complen;
char component[MAXNAMELEN + 1];
char *path;
+ int error;
path = pathbuf + size;
*--path = '\0';
for (;;) {
- db = dmu_bonus_hold(os, object);
- if (db == NULL)
+ error = dmu_bonus_hold(os, object, FTAG, &db);
+ if (error)
break;
- dmu_buf_read(db);
dmu_object_info_from_db(db, &doi);
zp = db->db_data;
parent = zp->zp_parent;
- dmu_buf_rele(db);
+ dmu_buf_rele(db, FTAG);
if (doi.doi_bonus_type != DMU_OT_ZNODE)
break;
@@ -881,7 +880,7 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
dump_none, /* ZIL intent log */
dump_dnode, /* DMU dnode */
dump_dmu_objset, /* DMU objset */
- dump_dsl_dir, /* DSL directory */
+ dump_dsl_dir, /* DSL directory */
dump_zap, /* DSL directory child map */
dump_zap, /* DSL dataset snap map */
dump_zap, /* DSL props */
@@ -897,6 +896,7 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
dump_uint8, /* other uint8[] */
dump_uint64, /* other uint64[] */
dump_zap, /* other ZAP */
+ dump_zap, /* persistent error log */
};
static void
@@ -920,10 +920,10 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
if (object == 0) {
dn = os->os->os_meta_dnode;
} else {
- db = dmu_bonus_hold(os, object);
- if (db == NULL)
- fatal("dmu_bonus_hold(%llu) failed", object);
- dmu_buf_read(db);
+ error = dmu_bonus_hold(os, object, FTAG, &db);
+ if (error)
+ fatal("dmu_bonus_hold(%llu) failed, errno %u",
+ object, error);
bonus = db->db_data;
bsize = db->db_size;
dn = ((dmu_buf_impl_t *)db)->db_dnode;
@@ -999,7 +999,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
if (db != NULL)
- dmu_buf_rele(db);
+ dmu_buf_rele(db, FTAG);
}
static char *objset_types[DMU_OST_NUMTYPES] = {
@@ -1214,7 +1214,7 @@ zdb_space_map_load(spa_t *spa)
}
static int
-zdb_space_map_claim(spa_t *spa, blkptr_t *bp)
+zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb)
{
dva_t *dva = &bp->blk_dva[0];
uint64_t vdev = DVA_GET_VDEV(dva);
@@ -1248,7 +1248,7 @@ zdb_space_map_claim(spa_t *spa, blkptr_t *bp)
error = zio_wait(zio_read(NULL, spa, &blk,
&gbh, SPA_GANGBLOCKSIZE, NULL, NULL,
ZIO_PRIORITY_SYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD));
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb));
if (error)
return (error);
if (BP_SHOULD_BYTESWAP(&blk))
@@ -1256,7 +1256,7 @@ zdb_space_map_claim(spa_t *spa, blkptr_t *bp)
for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
if (gbh.zg_blkptr[g].blk_birth == 0)
break;
- error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g]);
+ error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb);
if (error)
return (error);
}
@@ -1327,11 +1327,6 @@ zdb_refresh_ubsync(spa_t *spa)
zio_t *zio;
/*
- * Reopen all devices to purge zdb's vdev caches.
- */
- vdev_reopen(rvd, NULL);
-
- /*
* Reload the uberblock.
*/
zio = zio_root(spa, NULL, NULL,
@@ -1367,8 +1362,6 @@ typedef struct zdb_cb {
int zcb_haderrors;
} zdb_cb_t;
-static blkptr_cb_t zdb_blkptr_cb;
-
static void
zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
{
@@ -1388,7 +1381,7 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
if (dump_opt['L'])
return;
- error = zdb_space_map_claim(spa, bp);
+ error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark);
if (error == 0)
return;
@@ -1402,22 +1395,6 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
(void) fatal("fatal error %d in bp %p", error, bp);
}
-static void
-zdb_log_block_cb(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t first_txg)
-{
- if (bp->blk_birth < first_txg) {
- zdb_cb_t *zcb = arg;
- traverse_blk_cache_t bc = *zcb->zcb_cache;
- zbookmark_t *zb = &bc.bc_bookmark;
-
- zb->zb_objset = bp->blk_cksum.zc_word[2];
- zb->zb_blkid = bp->blk_cksum.zc_word[3];
- bc.bc_blkptr = *bp;
-
- (void) zdb_blkptr_cb(&bc, zilog->zl_spa, arg);
- }
-}
-
static int
zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
{
@@ -1444,11 +1421,11 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
blkbuf[0] = '\0';
(void) printf("zdb_blkptr_cb: Got error %d reading "
- "<%llu, %llu, %d, %llx> %s -- %s\n",
+ "<%llu, %llu, %lld, %llx> %s -- %s\n",
bc->bc_errno,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
- zb->zb_level,
+ (u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid,
blkbuf,
error == EAGAIN ? "retrying" : "skipping");
@@ -1472,18 +1449,6 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
blkbuf);
}
- if (type == DMU_OT_OBJSET) {
- objset_phys_t *osphys = bc->bc_data;
- zilog_t zilog = { 0 };
- zilog.zl_header = &osphys->os_zil_header;
- zilog.zl_spa = spa;
-
- zcb->zcb_cache = bc;
-
- zil_parse(&zilog, zdb_log_block_cb, NULL, zcb,
- spa_first_txg(spa));
- }
-
return (0);
}
@@ -1492,6 +1457,7 @@ dump_block_stats(spa_t *spa)
{
traverse_handle_t *th;
zdb_cb_t zcb = { 0 };
+ traverse_blk_cache_t dummy_cache = { 0 };
zdb_blkstats_t *zb, *tzb;
uint64_t alloc, space;
int leaks = 0;
@@ -1499,10 +1465,12 @@ dump_block_stats(spa_t *spa)
int flags;
int e;
+ zcb.zcb_cache = &dummy_cache;
+
if (dump_opt['c'])
advance |= ADVANCE_DATA;
- advance |= ADVANCE_PRUNE;
+ advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
(void) printf("\nTraversing all blocks to %sverify"
" nothing leaked ...\n",
@@ -1526,8 +1494,8 @@ dump_block_stats(spa_t *spa)
blkptr_t blk;
uint64_t itor = 0;
- bplist_open(bpl, spa->spa_meta_objset,
- spa->spa_sync_bplist_obj);
+ VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
+ spa->spa_sync_bplist_obj));
while (bplist_iterate(bpl, &itor, &blk) == 0) {
zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
@@ -1543,8 +1511,8 @@ dump_block_stats(spa_t *spa)
}
/*
- * Now traverse the pool. If we're read all data to verify checksums,
- * do a scrubbing read so that we validate all copies.
+ * Now traverse the pool. If we're reading all data to verify
+ * checksums, do a scrubbing read so that we validate all copies.
*/
flags = ZIO_FLAG_CANFAIL;
if (advance & ADVANCE_DATA)
@@ -1552,7 +1520,7 @@ dump_block_stats(spa_t *spa)
th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags);
th->th_noread = zdb_noread;
- traverse_add_pool(th, 0, -1ULL);
+ traverse_add_pool(th, 0, spa_first_txg(spa));
while (traverse_more(th) == EAGAIN)
continue;
@@ -1734,6 +1702,7 @@ main(int argc, char **argv)
int verbose = 0;
int error;
int flag, set;
+ vdev_knob_t *vk;
(void) setrlimit(RLIMIT_NOFILE, &rl);
@@ -1789,10 +1758,10 @@ main(int argc, char **argv)
zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0);
zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16);
(void) printf("simulating bad block "
- "<%llu, %llu, %d, %llx>\n",
+ "<%llu, %llu, %lld, %llx>\n",
(u_longlong_t)zdb_noread.zb_objset,
(u_longlong_t)zdb_noread.zb_object,
- zdb_noread.zb_level,
+ (u_longlong_t)zdb_noread.zb_level,
(u_longlong_t)zdb_noread.zb_blkid);
break;
case 'v':
@@ -1809,6 +1778,15 @@ main(int argc, char **argv)
kernel_init(FREAD);
+ /*
+ * Disable vdev caching. If we don't do this, live pool traversal
+ * won't make progress because it will never see disk updates.
+ */
+ for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) {
+ if (strcmp(vk->vk_name, "cache_size") == 0)
+ vk->vk_default = 0;
+ }
+
for (c = 0; c < 256; c++) {
if (dump_all && c != 'L' && c != 'l')
dump_opt[c] = 1;
diff --git a/usr/src/cmd/zdb/zdb_il.c b/usr/src/cmd/zdb/zdb_il.c
index 1006115709..ffa2471cda 100644
--- a/usr/src/cmd/zdb/zdb_il.c
+++ b/usr/src/cmd/zdb/zdb_il.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -129,9 +128,19 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
if (bp->blk_birth == 0) {
bzero(buf, sizeof (buf));
} else {
+ zbookmark_t zb;
+
+ ASSERT3U(bp->blk_cksum.zc_word[2], ==,
+ dmu_objset_id(zilog->zl_os));
+
+ zb.zb_objset = bp->blk_cksum.zc_word[2];
+ zb.zb_object = 0;
+ zb.zb_level = -1;
+ zb.zb_blkid = bp->blk_cksum.zc_word[3];
+
error = zio_wait(zio_read(NULL, zilog->zl_spa,
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL));
+ ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
if (error)
return;
}
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index a92b012744..b58a29cfa4 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -111,9 +110,8 @@ typedef struct zfs_command {
/*
* Master command table. Each ZFS command has a name, associated function, and
- * usage message. Unfortunately, the usage messages need to be
- * iternationalized, so we have to have a function to return the usage message
- * based on a command index.
+ * usage message. The usage messages need to be internationalized, so we have
+ * to have a function to return the usage message based on a command index.
*
* These commands are organized according to how they are displayed in the usage
* message. An empty command (one with a NULL name) indicates an empty line in
@@ -2569,7 +2567,7 @@ manual_mount(int argc, char **argv)
char *dataset, *path;
/* check options */
- while ((c = getopt(argc, argv, ":o:O")) != -1) {
+ while ((c = getopt(argc, argv, ":mo:O")) != -1) {
switch (c) {
case 'o':
(void) strlcpy(mntopts, optarg, sizeof (mntopts));
@@ -2577,6 +2575,9 @@ manual_mount(int argc, char **argv)
case 'O':
flags |= MS_OVERLAY;
break;
+ case 'm':
+ flags |= MS_NOMNTTAB;
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
diff --git a/usr/src/cmd/zinject/Makefile b/usr/src/cmd/zinject/Makefile
new file mode 100644
index 0000000000..f646689967
--- /dev/null
+++ b/usr/src/cmd/zinject/Makefile
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+PROG:sh= basename `pwd`
+
+include ../Makefile.cmd
+
+$(INTEL_BLD)SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+install := TARGET = install
+clean := TARGET = clean
+clobber := TARGET = clobber
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS)
+ -$(RM) $(ROOTUSRSBINPROG)
+ -$(LN) $(ISAEXEC) $(ROOTUSRSBINPROG)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/zinject/Makefile.com b/usr/src/cmd/zinject/Makefile.com
new file mode 100644
index 0000000000..40f1914729
--- /dev/null
+++ b/usr/src/cmd/zinject/Makefile.com
@@ -0,0 +1,55 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+PROG:sh= cd ..; basename `pwd`
+SRCS= ../$(PROG).c ../translate.c
+
+include ../../Makefile.cmd
+
+INCS += -I../../../lib/libzpool/common
+INCS += -I../../../uts/common/fs/zfs
+
+LDLIBS += -lzpool -lzfs
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(INCS)
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+$(PROG): $(SRCS)
+ $(LINK.c) -o $(PROG) $(SRCS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean:
+
+lint: lint_SRCS
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/zinject/amd64/Makefile b/usr/src/cmd/zinject/amd64/Makefile
new file mode 100644
index 0000000000..8740a9f3ac
--- /dev/null
+++ b/usr/src/cmd/zinject/amd64/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+install: all $(ROOTUSRSBINPROG64)
diff --git a/usr/src/cmd/zinject/i386/Makefile b/usr/src/cmd/zinject/i386/Makefile
new file mode 100644
index 0000000000..d2cb13dcd1
--- /dev/null
+++ b/usr/src/cmd/zinject/i386/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install: all $(ROOTUSRSBINPROG32)
diff --git a/usr/src/cmd/zinject/sparcv9/Makefile b/usr/src/cmd/zinject/sparcv9/Makefile
new file mode 100644
index 0000000000..8740a9f3ac
--- /dev/null
+++ b/usr/src/cmd/zinject/sparcv9/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+install: all $(ROOTUSRSBINPROG64)
diff --git a/usr/src/cmd/zinject/translate.c b/usr/src/cmd/zinject/translate.c
new file mode 100644
index 0000000000..882b230930
--- /dev/null
+++ b/usr/src/cmd/zinject/translate.c
@@ -0,0 +1,458 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <libzfs.h>
+
+#undef verify /* both libzfs.h and zfs_context.h want to define this */
+
+#include <sys/zfs_context.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/file.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dnode.h>
+
+#include <sys/mkdev.h>
+
+#include "zinject.h"
+
+extern void kernel_init(int);
+extern void kernel_fini(void);
+
+static int debug;
+
+static void
+ziprintf(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!debug)
+ return;
+
+ va_start(ap, fmt);
+ (void) vprintf(fmt, ap);
+ va_end(ap);
+}
+
+/*
+ * Given a full path to a file, translate into a dataset name and a relative
+ * path within the dataset. 'dataset' must be at least MAXNAMELEN characters,
+ * and 'relpath' must be at least MAXPATHLEN characters. We also pass a stat64
+ * buffer, which we need later to get the object ID.
+ */
+static int
+parse_pathname(const char *fullpath, char *dataset, char *relpath,
+ struct stat64 *statbuf)
+{
+ struct extmnttab mp;
+ FILE *fp;
+ int match;
+ const char *rel;
+
+ if (fullpath[0] != '/') {
+ (void) fprintf(stderr, "invalid object '%s': must be full "
+ "path\n", fullpath);
+ usage();
+ return (-1);
+ }
+
+ if (strlen(fullpath) >= MAXPATHLEN) {
+ (void) fprintf(stderr, "invalid object; pathname too long\n");
+ return (-1);
+ }
+
+ if (stat64(fullpath, statbuf) != 0) {
+ (void) fprintf(stderr, "cannot open '%s': %s\n",
+ fullpath, strerror(errno));
+ return (-1);
+ }
+
+ if ((fp = fopen(MNTTAB, "r")) == NULL) {
+ (void) fprintf(stderr, "cannot open /etc/mnttab\n");
+ return (-1);
+ }
+
+ match = 0;
+ while (getextmntent(fp, &mp, sizeof (mp)) == 0) {
+ if (makedev(mp.mnt_major, mp.mnt_minor) == statbuf->st_dev) {
+ match = 1;
+ break;
+ }
+ }
+
+ if (!match) {
+ (void) fprintf(stderr, "cannot find mountpoint for '%s'\n",
+ fullpath);
+ return (-1);
+ }
+
+ if (strcmp(mp.mnt_fstype, MNTTYPE_ZFS) != 0) {
+ (void) fprintf(stderr, "invalid path '%s': not a ZFS "
+ "filesystem\n", fullpath);
+ return (-1);
+ }
+
+ if (strncmp(fullpath, mp.mnt_mountp, strlen(mp.mnt_mountp)) != 0) {
+ (void) fprintf(stderr, "invalid path '%s': mountpoint "
+ "doesn't match path\n", fullpath);
+ return (-1);
+ }
+
+ (void) strcpy(dataset, mp.mnt_special);
+
+ rel = fullpath + strlen(mp.mnt_mountp);
+ if (rel[0] == '/')
+ rel++;
+ (void) strcpy(relpath, rel);
+
+ return (0);
+}
+
+/*
+ * Convert from a (dataset, path) pair into a (objset, object) pair. Note that
+ * we grab the object number from the inode number, since looking this up via
+ * libzpool is a real pain.
+ */
+/* ARGSUSED */
+static int
+object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
+ zinject_record_t *record)
+{
+ objset_t *os;
+ int err;
+
+ /*
+ * Before doing any libzpool operations, call sync() to ensure that the
+ * on-disk state is consistent with the in-core state.
+ */
+ sync();
+
+ if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
+ DS_MODE_STANDARD | DS_MODE_READONLY, &os)) != 0) {
+ (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
+ dataset, strerror(err));
+ return (-1);
+ }
+
+ record->zi_objset = dmu_objset_id(os);
+ record->zi_object = statbuf->st_ino;
+
+ dmu_objset_close(os);
+
+ return (0);
+}
+
+/*
+ * Calculate the real range based on the type, level, and range given.
+ */
+static int
+calculate_range(const char *dataset, err_type_t type, int level, char *range,
+ zinject_record_t *record)
+{
+ objset_t *os = NULL;
+ dnode_t *dn = NULL;
+ int err;
+ int ret = -1;
+
+ /*
+ * Determine the numeric range from the string.
+ */
+ if (range == NULL) {
+ /*
+ * If range is unspecified, set the range to [0,-1], which
+ * indicates that the whole object should be treated as an
+ * error.
+ */
+ record->zi_start = 0;
+ record->zi_end = -1ULL;
+ } else {
+ char *end;
+
+ /* XXX add support for suffixes */
+ record->zi_start = strtoull(range, &end, 10);
+
+
+ if (*end == '\0')
+ record->zi_end = record->zi_start + 1;
+ else if (*end == ',')
+ record->zi_end = strtoull(end + 1, &end, 10);
+
+ if (*end != '\0') {
+ (void) fprintf(stderr, "invalid range '%s': must be "
+ "a numeric range of the form 'start[,end]'\n",
+ range);
+ goto out;
+ }
+ }
+
+ switch (type) {
+ case TYPE_DATA:
+ break;
+
+ case TYPE_DNODE:
+ /*
+ * If this is a request to inject faults into the dnode, then we
+ * must translate the current (objset,object) pair into an
+ * offset within the metadnode for the objset. Specifying any
+ * kind of range with type 'dnode' is illegal.
+ */
+ if (range != NULL) {
+ (void) fprintf(stderr, "range cannot be specified when "
+ "type is 'dnode'\n");
+ goto out;
+ }
+
+ record->zi_start = record->zi_object * sizeof (dnode_phys_t);
+ record->zi_end = record->zi_start + sizeof (dnode_phys_t);
+ record->zi_object = 0;
+ break;
+ }
+
+ /*
+ * Get the dnode associated with object, so we can calculate the block
+ * size.
+ */
+ if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
+ DS_MODE_STANDARD | DS_MODE_READONLY, &os)) != 0) {
+ (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
+ dataset, strerror(err));
+ goto out;
+ }
+
+ if (record->zi_object == 0) {
+ dn = os->os->os_meta_dnode;
+ } else {
+ err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
+ if (err != 0) {
+ (void) fprintf(stderr, "failed to hold dnode "
+ "for object %llu\n",
+ (u_longlong_t)record->zi_object);
+ goto out;
+ }
+ }
+
+
+ ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
+ ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
+
+ /*
+ * Translate range into block IDs.
+ */
+ if (record->zi_start != 0 || record->zi_end != -1ULL) {
+ record->zi_start >>= dn->dn_datablkshift;
+ record->zi_end >>= dn->dn_datablkshift;
+ }
+
+ /*
+ * Check level, and then translate level 0 blkids into ranges
+ * appropriate for level of indirection.
+ */
+ record->zi_level = level;
+ if (level > 0) {
+ ziprintf("level 0 blkid range: [%llu, %llu]\n",
+ record->zi_start, record->zi_end);
+
+ if (level >= dn->dn_nlevels) {
+ (void) fprintf(stderr, "level %d exceeds max level "
+ "of object (%d)\n", level, dn->dn_nlevels - 1);
+ goto out;
+ }
+
+ if (record->zi_start != 0 || record->zi_end != 0) {
+ int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+
+ for (; level > 0; level--) {
+ record->zi_start >>= shift;
+ record->zi_end >>= shift;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ if (dn) {
+ if (dn != os->os->os_meta_dnode)
+ dnode_rele(dn, FTAG);
+ }
+ if (os)
+ dmu_objset_close(os);
+
+ return (ret);
+}
+
+int
+translate_record(err_type_t type, const char *object, const char *range,
+ int level, zinject_record_t *record, char *poolname, char *dataset)
+{
+ char path[MAXPATHLEN];
+ char *slash;
+ struct stat64 statbuf;
+ int ret = -1;
+
+ kernel_init(FREAD);
+
+ debug = (getenv("ZINJECT_DEBUG") != NULL);
+
+ ziprintf("translating: %s\n", object);
+
+ if (MOS_TYPE(type)) {
+ /*
+ * MOS objects are treated specially.
+ */
+ switch (type) {
+ case TYPE_MOS:
+ record->zi_type = 0;
+ break;
+ case TYPE_MOSDIR:
+ record->zi_type = DMU_OT_OBJECT_DIRECTORY;
+ break;
+ case TYPE_METASLAB:
+ record->zi_type = DMU_OT_OBJECT_ARRAY;
+ break;
+ case TYPE_CONFIG:
+ record->zi_type = DMU_OT_PACKED_NVLIST;
+ break;
+ case TYPE_BPLIST:
+ record->zi_type = DMU_OT_BPLIST;
+ break;
+ case TYPE_SPACEMAP:
+ record->zi_type = DMU_OT_SPACE_MAP;
+ break;
+ case TYPE_ERRLOG:
+ record->zi_type = DMU_OT_ERROR_LOG;
+ break;
+ }
+
+ dataset[0] = '\0';
+ (void) strcpy(poolname, object);
+ return (0);
+ }
+
+ /*
+ * Convert a full path into a (dataset, file) pair.
+ */
+ if (parse_pathname(object, dataset, path, &statbuf) != 0)
+ goto err;
+
+ ziprintf(" dataset: %s\n", dataset);
+ ziprintf(" path: %s\n", path);
+
+ /*
+ * Convert (dataset, file) into (objset, object)
+ */
+ if (object_from_path(dataset, path, &statbuf, record) != 0)
+ goto err;
+
+ ziprintf("raw objset: %llu\n", record->zi_objset);
+ ziprintf("raw object: %llu\n", record->zi_object);
+
+ /*
+ * For the given object, calculate the real (type, level, range)
+ */
+ if (calculate_range(dataset, type, level, (char *)range, record) != 0)
+ goto err;
+
+ ziprintf(" objset: %llu\n", record->zi_objset);
+ ziprintf(" object: %llu\n", record->zi_object);
+ if (record->zi_start == 0 &&
+ record->zi_end == -1ULL)
+ ziprintf(" range: all\n");
+ else
+ ziprintf(" range: [%llu, %llu]\n", record->zi_start,
+ record->zi_end);
+
+ /*
+ * Copy the pool name
+ */
+ (void) strcpy(poolname, dataset);
+ if ((slash = strchr(poolname, '/')) != NULL)
+ *slash = '\0';
+
+ ret = 0;
+
+err:
+ kernel_fini();
+ return (ret);
+}
+
+int
+translate_raw(const char *str, zinject_record_t *record)
+{
+ /*
+ * A raw bookmark of the form objset:object:level:blkid, where each
+ * number is a hexidecimal value.
+ */
+ if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset,
+ (u_longlong_t *)&record->zi_object, &record->zi_level,
+ (u_longlong_t *)&record->zi_start) != 4) {
+ (void) fprintf(stderr, "bad raw spec '%s': must be of the form "
+ "'objset:object:level:blkid'\n", str);
+ return (-1);
+ }
+
+ record->zi_end = record->zi_start;
+
+ return (0);
+}
+
+int
+translate_device(const char *pool, const char *device, zinject_record_t *record)
+{
+ char *end;
+ zpool_handle_t *zhp;
+
+ /*
+ * Given a device name or GUID, create an appropriate injection record
+ * with zi_guid set.
+ */
+ if ((zhp = zpool_open(pool)) == NULL)
+ return (-1);
+
+ record->zi_guid = strtoull(device, &end, 16);
+ if (record->zi_guid == 0 || *end != '\0')
+ record->zi_guid = zpool_vdev_to_guid(zhp, device);
+
+ if (record->zi_guid == 0) {
+ (void) fprintf(stderr, "cannot find device '%s' in pool '%s'\n",
+ device, pool);
+ return (-1);
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c
new file mode 100644
index 0000000000..b584fb0de5
--- /dev/null
+++ b/usr/src/cmd/zinject/zinject.c
@@ -0,0 +1,739 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * ZFS Fault Injector
+ *
+ * This userland component takes a set of options and uses libzpool to translate
+ * from a user-visible object type and name to an internal representation.
+ * There are two basic types of faults: device faults and data faults.
+ *
+ *
+ * DEVICE FAULTS
+ *
+ * Errors can be injected into a particular vdev using the '-d' option. This
+ * option takes a path or vdev GUID to uniquely identify the device within a
+ * pool. There are two types of errors that can be injected, EIO and ENXIO,
+ * that can be controlled through the '-t' option. The default is ENXIO. For
+ * EIO failures, any attempt to read data from the device will return EIO, but
+ * subsequent attempt to reopen the device will succeed. For ENXIO failures,
+ * any attempt to read from the device will return EIO, but any attempt to
+ * reopen the device will also return ENXIO.
+ *
+ * This form of the command looks like:
+ *
+ * zinject -d device [-t type] pool
+ *
+ *
+ * DATA FAULTS
+ *
+ * We begin with a tuple of the form:
+ *
+ * <type,level,range,object>
+ *
+ * type A string describing the type of data to target. Each type
+ * implicitly describes how to interpret 'object'. Currently,
+ * the following values are supported:
+ *
+ * data User data for a file
+ * dnode Dnode for a file or directory
+ *
+ * The following MOS objects are special. Instead of injecting
+ * errors on a particular object or blkid, we inject errors across
+ * all objects of the given type.
+ *
+ * mos Any data in the MOS
+ * mosdir object directory
+ * config pool configuration
+ * bplist blkptr list
+ * spacemap spacemap
+ * metaslab metaslab
+ * errlog persistent error log
+ *
+ * level Object level. Defaults to '0', not applicable to all types. If
+ * a range is given, this corresponds to the indirect block
+ * corresponding to the specific range.
+ *
+ * range A numerical range [start,end) within the object. Defaults to
+ * the full size of the file.
+ *
+ * object A string describing the logical location of the object. For
+ * files and directories (currently the only supported types),
+ * this is the path of the object on disk.
+ *
+ * This is translated, via libzpool, into the following internal representation:
+ *
+ * <type,objset,object,level,range>
+ *
+ * These types should be self-explanatory. This tuple is then passed to the
+ * kernel via a special ioctl() to initiate fault injection for the given
+ * object. Note that 'type' is not strictly necessary for fault injection, but
+ * is used when translating existing faults into a human-readable string.
+ *
+ *
+ * The command itself takes one of the forms:
+ *
+ * zinject
+ * zinject <-a | -u pool>
+ * zinject -c <id|all>
+ * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
+ * [-r range] <object>
+ * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
+ *
+ * With no arguments, the command prints all currently registered injection
+ * handlers, with their numeric identifiers.
+ *
+ * The '-c' option will clear the given handler, or all handlers if 'all' is
+ * specified.
+ *
+ * The '-e' option takes a string describing the errno to simulate. This must
+ * be either 'io' or 'checksum'. In most cases this will result in the same
+ * behavior, but RAID-Z will produce a different set of ereports for this
+ * situation.
+ *
+ * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is
+ * specified, then the ARC cache is flushed appropriately. If '-u' is
+ * specified, then the underlying SPA is unloaded. Either of these flags can be
+ * specified independently of any other handlers. The '-m' flag automatically
+ * does an unmount and remount of the underlying dataset to aid in flushing the
+ * cache.
+ *
+ * The '-f' flag controls the frequency of errors injected, expressed as a
+ * integer percentage between 1 and 100. The default is 100.
+ *
+ * The this form is responsible for actually injecting the handler into the
+ * framework. It takes the arguments described above, translates them to the
+ * internal tuple using libzpool, and then issues an ioctl() to register the
+ * handler.
+ *
+ * The final form can target a specific bookmark, regardless of whether a
+ * human-readable interface has been designed. It allows developers to specify
+ * a particular block by number.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <sys/fs/zfs.h>
+#include <sys/mount.h>
+
+#include <libzfs.h>
+
+#undef verify /* both libzfs.h and zfs_context.h want to define this */
+
+#include "zinject.h"
+
+int zfs_fd;
+
+#define ECKSUM EBADE
+
+static const char *errtable[TYPE_INVAL] = {
+ "data",
+ "dnode",
+ "mos",
+ "mosdir",
+ "metaslab",
+ "config",
+ "bplist",
+ "spacemap",
+ "errlog"
+};
+
+static err_type_t
+name_to_type(const char *arg)
+{
+ int i;
+ for (i = 0; i < TYPE_INVAL; i++)
+ if (strcmp(errtable[i], arg) == 0)
+ return (i);
+
+ return (TYPE_INVAL);
+}
+
+static const char *
+type_to_name(uint64_t type)
+{
+ switch (type) {
+ case DMU_OT_OBJECT_DIRECTORY:
+ return ("mosdir");
+ case DMU_OT_OBJECT_ARRAY:
+ return ("metaslab");
+ case DMU_OT_PACKED_NVLIST:
+ return ("config");
+ case DMU_OT_BPLIST:
+ return ("bplist");
+ case DMU_OT_SPACE_MAP:
+ return ("spacemap");
+ case DMU_OT_ERROR_LOG:
+ return ("errlog");
+ default:
+ return ("-");
+ }
+}
+
+
+/*
+ * Print usage message.
+ */
+void
+usage(void)
+{
+ (void) printf(
+ "usage:\n"
+ "\n"
+ "\tzinject\n"
+ "\n"
+ "\t\tList all active injection records.\n"
+ "\n"
+ "\tzinject -c <id|all>\n"
+ "\n"
+ "\t\tClear the particular record (if given a numeric ID), or\n"
+ "\t\tall records if 'all' is specificed.\n"
+ "\n"
+ "\tzinject -d device [-e errno] pool\n"
+ "\t\tInject a fault into a particular device. 'errno' can either\n"
+ "\t\tbe 'nxio' (the default) or 'io'.\n"
+ "\n"
+ "\tzinject -b objset:object:level:blkid pool\n"
+ "\n"
+ "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
+ "\t\tspecified by the remaining tuple. Each number is in\n"
+ "\t\thexidecimal, and only one block can be specified.\n"
+ "\n"
+ "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
+ "\t [-a] [-m] [-u] [-f freq] <object>\n"
+ "\n"
+ "\t\tInject an error into the object specified by the '-t' option\n"
+ "\t\tand the object descriptor. The 'object' parameter is\n"
+ "\t\tinterperted depending on the '-t' option.\n"
+ "\n"
+ "\t\t-q\tQuiet mode. Only print out the handler number added.\n"
+ "\t\t-e\tInject a specific error. Must be either 'io' or\n"
+ "\t\t\t'checksum'. Default is 'io'.\n"
+ "\t\t-l\tInject error at a particular block level. Default is "
+ "0.\n"
+ "\t\t-m\tAutomatically remount underlying filesystem.\n"
+ "\t\t-r\tInject error over a particular logical range of an\n"
+ "\t\t\tobject. Will be translated to the appropriate blkid\n"
+ "\t\t\trange according to the object's properties.\n"
+ "\t\t-a\tFlush the ARC cache. Can be specified without any\n"
+ "\t\t\tassociated object.\n"
+ "\t\t-u\tUnload the associated pool. Can be specified with only\n"
+ "\t\t\ta pool object.\n"
+ "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
+ "\t\t\ta percentage between 1 and 100.\n"
+ "\n"
+ "\t-t data\t\tInject an error into the plain file contents of a\n"
+ "\t\t\tfile. The object must be specified as a complete path\n"
+ "\t\t\tto a file on a ZFS filesystem.\n"
+ "\n"
+ "\t-t dnode\tInject an error into the metadnode in the block\n"
+ "\t\t\tcorresponding to the dnode for a file or directory. The\n"
+ "\t\t\t'-r' option is incompatible with this mode. The object\n"
+ "\t\t\tis specified as a complete path to a file or directory\n"
+ "\t\t\ton a ZFS filesystem.\n"
+ "\n"
+ "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
+ "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n"
+ "\t\t\tspacemap, metaslab, errlog\n");
+}
+
+static int
+iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
+ void *data)
+{
+ zfs_cmd_t zc;
+ int ret;
+
+ zc.zc_guid = 0;
+
+ while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
+ if ((ret = func((int)zc.zc_guid, zc.zc_name,
+ &zc.zc_inject_record, data)) != 0)
+ return (ret);
+
+ return (0);
+}
+
+static int
+print_data_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ int *count = data;
+
+ if (record->zi_guid != 0)
+ return (0);
+
+ if (*count == 0) {
+ (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n",
+ "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE");
+ (void) printf("--- --------------- ------ "
+ "------ -------- --- ---------------\n");
+ }
+
+ *count += 1;
+
+ (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool,
+ (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
+ type_to_name(record->zi_type), record->zi_level);
+
+ if (record->zi_start == 0 &&
+ record->zi_end == -1ULL)
+ (void) printf("all\n");
+ else
+ (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
+ (u_longlong_t)record->zi_end);
+
+ return (0);
+}
+
+static int
+print_device_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ int *count = data;
+
+ if (record->zi_guid == 0)
+ return (0);
+
+ if (*count == 0) {
+ (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID");
+ (void) printf("--- --------------- ----------------\n");
+ }
+
+ *count += 1;
+
+ (void) printf("%3d %-15s %llx\n", id, pool,
+ (u_longlong_t)record->zi_guid);
+
+ return (0);
+}
+
+/*
+ * Print all registered error handlers. Returns the number of handlers
+ * registered.
+ */
+static int
+print_all_handlers(void)
+{
+ int count = 0;
+
+ (void) iter_handlers(print_device_handler, &count);
+ (void) printf("\n");
+ count = 0;
+ (void) iter_handlers(print_data_handler, &count);
+
+ return (count);
+}
+
+/* ARGSUSED */
+static int
+cancel_one_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ zfs_cmd_t zc;
+
+ zc.zc_guid = (uint64_t)id;
+
+ if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
+ (void) fprintf(stderr, "failed to remove handler %d: %s\n",
+ id, strerror(errno));
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * Remove all fault injection handlers.
+ */
+static int
+cancel_all_handlers(void)
+{
+ int ret = iter_handlers(cancel_one_handler, NULL);
+
+ (void) printf("removed all registered handlers\n");
+
+ return (ret);
+}
+
+/*
+ * Remove a specific fault injection handler.
+ */
+static int
+cancel_handler(int id)
+{
+ zfs_cmd_t zc;
+
+ zc.zc_guid = (uint64_t)id;
+
+ if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
+ (void) fprintf(stderr, "failed to remove handler %d: %s\n",
+ id, strerror(errno));
+ return (1);
+ }
+
+ (void) printf("removed handler %d\n", id);
+
+ return (0);
+}
+
+/*
+ * Register a new fault injection handler.
+ */
+static int
+register_handler(const char *pool, int flags, zinject_record_t *record,
+ int quiet)
+{
+ zfs_cmd_t zc;
+
+ (void) strcpy(zc.zc_name, pool);
+ zc.zc_inject_record = *record;
+ zc.zc_guid = flags;
+
+ if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
+ (void) fprintf(stderr, "failed to add handler: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ if (flags & ZINJECT_NULL)
+ return (0);
+
+ if (quiet) {
+ (void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
+ } else {
+ (void) printf("Added handler %llu with the following "
+ "properties:\n", (u_longlong_t)zc.zc_guid);
+ (void) printf(" pool: %s\n", pool);
+ if (record->zi_guid) {
+ (void) printf(" vdev: %llx\n",
+ (u_longlong_t)record->zi_guid);
+ } else {
+ (void) printf("objset: %llu\n",
+ (u_longlong_t)record->zi_objset);
+ (void) printf("object: %llu\n",
+ (u_longlong_t)record->zi_object);
+ (void) printf(" type: %llu\n",
+ (u_longlong_t)record->zi_type);
+ (void) printf(" level: %d\n", record->zi_level);
+ if (record->zi_start == 0 &&
+ record->zi_end == -1ULL)
+ (void) printf(" range: all\n");
+ else
+ (void) printf(" range: [%llu, %llu)\n",
+ (u_longlong_t)record->zi_start,
+ (u_longlong_t)record->zi_end);
+ }
+ }
+
+ return (0);
+}
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ char *range = NULL;
+ char *cancel = NULL;
+ char *end;
+ char *raw = NULL;
+ char *device = NULL;
+ int level = 0;
+ int quiet = 0;
+ int error = 0;
+ int domount = 0;
+ err_type_t type = TYPE_INVAL;
+ zinject_record_t record = { 0 };
+ char pool[MAXNAMELEN];
+ char dataset[MAXNAMELEN];
+ zfs_handle_t *zhp;
+ int ret;
+ int flags = 0;
+
+ if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
+ (void) fprintf(stderr, "failed to open ZFS device\n");
+ return (1);
+ }
+
+ if (argc == 1) {
+ /*
+ * No arguments. Print the available handlers. If there are no
+ * available handlers, direct the user to '-h' for help
+ * information.
+ */
+ if (print_all_handlers() == 0) {
+ (void) printf("No handlers registered.\n");
+ (void) printf("Run 'zinject -h' for usage "
+ "information.\n");
+ }
+
+ return (0);
+ }
+
+ while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:u")) != -1) {
+ switch (c) {
+ case 'a':
+ flags |= ZINJECT_FLUSH_ARC;
+ break;
+ case 'b':
+ raw = optarg;
+ break;
+ case 'c':
+ cancel = optarg;
+ break;
+ case 'd':
+ device = optarg;
+ break;
+ case 'e':
+ if (strcasecmp(optarg, "io") == 0) {
+ error = EIO;
+ } else if (strcasecmp(optarg, "checksum") == 0) {
+ error = ECKSUM;
+ } else if (strcasecmp(optarg, "nxio") == 0) {
+ error = ENXIO;
+ } else {
+ (void) fprintf(stderr, "invalid error type "
+ "'%s': must be 'io', 'checksum' or "
+ "'nxio'\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case 'f':
+ record.zi_freq = atoi(optarg);
+ if (record.zi_freq < 1 || record.zi_freq > 100) {
+ (void) fprintf(stderr, "frequency range must "
+ "be in the range (0, 100]\n");
+ return (1);
+ }
+ break;
+ case 'h':
+ usage();
+ return (0);
+ case 'l':
+ level = (int)strtol(optarg, &end, 10);
+ if (*end != '\0') {
+ (void) fprintf(stderr, "invalid level '%s': "
+ "must be an integer\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case 'm':
+ domount = 1;
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ case 'r':
+ range = optarg;
+ break;
+ case 't':
+ if ((type = name_to_type(optarg)) == TYPE_INVAL) {
+ (void) fprintf(stderr, "invalid type '%s'\n",
+ optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case 'u':
+ flags |= ZINJECT_UNLOAD_SPA;
+ break;
+ case ':':
+ (void) fprintf(stderr, "option -%c requires an "
+ "operand\n", optopt);
+ usage();
+ return (1);
+ case '?':
+ (void) fprintf(stderr, "invalid option '%c'\n",
+ optopt);
+ usage();
+ return (2);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (cancel != NULL) {
+ /*
+ * '-c' is invalid with any other options.
+ */
+ if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+ level != 0) {
+ (void) fprintf(stderr, "cancel (-c) incompatible with "
+ "any other options\n");
+ usage();
+ return (2);
+ }
+ if (argc != 0) {
+ (void) fprintf(stderr, "extraneous argument to '-c'\n");
+ usage();
+ return (2);
+ }
+
+ if (strcmp(cancel, "all") == 0) {
+ return (cancel_all_handlers());
+ } else {
+ int id = (int)strtol(cancel, &end, 10);
+ if (*end != '\0') {
+ (void) fprintf(stderr, "invalid handle id '%s':"
+ " must be an integer or 'all'\n", cancel);
+ usage();
+ return (1);
+ }
+ return (cancel_handler(id));
+ }
+ }
+
+ if (device != NULL) {
+ /*
+ * Device (-d) injection uses a completely different mechanism
+ * for doing injection, so handle it separately here.
+ */
+ if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+ level != 0) {
+ (void) fprintf(stderr, "device (-d) incompatible with "
+ "data error injection\n");
+ usage();
+ return (2);
+ }
+
+ if (argc != 1) {
+ (void) fprintf(stderr, "device (-d) injection requires "
+ "a single pool name\n");
+ usage();
+ return (2);
+ }
+
+ (void) strcpy(pool, argv[0]);
+ dataset[0] = '\0';
+
+ if (error == ECKSUM) {
+ (void) fprintf(stderr, "device error type must be "
+ "'io' or 'nxio'\n");
+ return (1);
+ }
+
+ if (translate_device(pool, device, &record) != 0)
+ return (1);
+ if (!error)
+ error = ENXIO;
+ } else if (raw != NULL) {
+ if (range != NULL || type != TYPE_INVAL || level != 0) {
+ (void) fprintf(stderr, "raw (-b) format with "
+ "any other options\n");
+ usage();
+ return (2);
+ }
+
+ if (argc != 1) {
+ (void) fprintf(stderr, "raw (-b) format expects a "
+ "single pool name\n");
+ usage();
+ return (2);
+ }
+
+ (void) strcpy(pool, argv[0]);
+ dataset[0] = '\0';
+
+ if (error == ENXIO) {
+ (void) fprintf(stderr, "data error type must be "
+ "'checksum' or 'io'\n");
+ return (1);
+ }
+
+ if (translate_raw(raw, &record) != 0)
+ return (1);
+ if (!error)
+ error = EIO;
+ } else if (type == TYPE_INVAL) {
+ if (flags == 0) {
+ (void) fprintf(stderr, "at least one of '-b', '-d', "
+ "'-t', '-a', or '-u' must be specified\n");
+ usage();
+ return (2);
+ }
+
+ if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
+ (void) strcpy(pool, argv[0]);
+ dataset[0] = '\0';
+ } else if (argc != 0) {
+ (void) fprintf(stderr, "extraneous argument for "
+ "'-f'\n");
+ usage();
+ return (2);
+ }
+
+ flags |= ZINJECT_NULL;
+ } else {
+ if (argc != 1) {
+ (void) fprintf(stderr, "missing object\n");
+ usage();
+ return (2);
+ }
+
+ if (error == ENXIO) {
+ (void) fprintf(stderr, "data error type must be "
+ "'checksum' or 'io'\n");
+ return (1);
+ }
+
+ if (translate_record(type, argv[0], range, level, &record, pool,
+ dataset) != 0)
+ return (1);
+ if (!error)
+ error = EIO;
+ }
+
+ /*
+ * If this is pool-wide metadata, unmount everything. The ioctl() will
+ * unload the pool, so that we trigger spa-wide reopen of metadata next
+ * time we access the pool.
+ */
+ if (dataset[0] != '\0' && domount) {
+ if ((zhp = zfs_open(dataset, ZFS_TYPE_ANY)) == NULL)
+ return (1);
+
+ if (zfs_unmount(zhp, NULL, 0) != 0)
+ return (1);
+ }
+
+ record.zi_error = error;
+
+ ret = register_handler(pool, flags, &record, quiet);
+
+ if (dataset[0] != '\0' && domount)
+ ret = (zfs_mount(zhp, NULL, 0) != 0);
+
+ return (ret);
+}
diff --git a/usr/src/cmd/zinject/zinject.h b/usr/src/cmd/zinject/zinject.h
new file mode 100644
index 0000000000..bdbc2454c4
--- /dev/null
+++ b/usr/src/cmd/zinject/zinject.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZINJECT_H
+#define _ZINJECT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/zfs_ioctl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ TYPE_DATA, /* plain file contents */
+ TYPE_DNODE, /* metadnode contents */
+ TYPE_MOS, /* all MOS data */
+ TYPE_MOSDIR, /* MOS object directory */
+ TYPE_METASLAB, /* metaslab objects */
+ TYPE_CONFIG, /* MOS config */
+ TYPE_BPLIST, /* block pointer list */
+ TYPE_SPACEMAP, /* space map objects */
+ TYPE_ERRLOG, /* persistent error log */
+ TYPE_INVAL
+} err_type_t;
+
+#define MOS_TYPE(t) \
+ ((t) >= TYPE_MOS && (t) < TYPE_INVAL)
+
+int translate_record(err_type_t type, const char *object, const char *range,
+ int level, zinject_record_t *record, char *poolname, char *dataset);
+int translate_raw(const char *raw, zinject_record_t *record);
+int translate_device(const char *pool, const char *device,
+ zinject_record_t *record);
+void usage(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZINJECT_H */
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index 98dd9e67bc..deb04138be 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -2552,13 +2551,6 @@ validate_datasets(zlog_t *zlogp)
zfs_set_error_handler(zfs_error_handler);
- /*
- * libzfs opens /dev/zfs during its .init routine.
- * zoneadmd automatically closes these files when it daemonizes,
- * so we cheat by re-calling the init routine.
- */
- zfs_init();
-
while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
if ((zhp = zfs_open(dstab.zone_dataset_name,
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index 95857402cb..1b86fe538c 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -58,6 +58,7 @@ static int zpool_do_status(int, char **);
static int zpool_do_online(int, char **);
static int zpool_do_offline(int, char **);
+static int zpool_do_clear(int, char **);
static int zpool_do_attach(int, char **);
static int zpool_do_detach(int, char **);
@@ -87,6 +88,7 @@ _umem_logging_init(void)
typedef enum {
HELP_ADD,
HELP_ATTACH,
+ HELP_CLEAR,
HELP_CREATE,
HELP_DESTROY,
HELP_DETACH,
@@ -110,9 +112,8 @@ typedef struct zpool_command {
/*
* Master command table. Each ZFS command has a name, associated function, and
- * usage message. Unfortunately, the usage messages need to be
- * iternationalized, so we have to have a function to return the usage message
- * based on a command index.
+ * usage message. The usage messages need to be internationalized, so we have
+ * to have a function to return the usage message based on a command index.
*
* These commands are organized according to how they are displayed in the usage
* message. An empty command (one with a NULL name) indicates an empty line in
@@ -130,6 +131,7 @@ static zpool_command_t command_table[] = {
{ NULL },
{ "online", zpool_do_online, HELP_ONLINE },
{ "offline", zpool_do_offline, HELP_OFFLINE },
+ { "clear", zpool_do_clear, HELP_CLEAR },
{ NULL },
{ "attach", zpool_do_attach, HELP_ATTACH },
{ "detach", zpool_do_detach, HELP_DETACH },
@@ -153,6 +155,8 @@ get_usage(zpool_help_t idx) {
case HELP_ATTACH:
return (gettext("\tattach [-f] <pool> <device> "
"<new_device>\n"));
+ case HELP_CLEAR:
+ return (gettext("\tclear <pool> [device]\n"));
case HELP_CREATE:
return (gettext("\tcreate [-fn] [-R root] [-m mountpoint] "
"<pool> <vdev> ...\n"));
@@ -277,12 +281,15 @@ usage(int requested)
}
const char *
-state_to_name(int state)
+state_to_name(vdev_stat_t *vs)
{
- switch (state) {
+ switch (vs->vs_state) {
case VDEV_STATE_CLOSED:
case VDEV_STATE_CANT_OPEN:
- return (gettext("FAULTED"));
+ if (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
+ return (gettext("FAULTED"));
+ else
+ return (gettext("UNAVAIL"));
case VDEV_STATE_OFFLINE:
return (gettext("OFFLINE"));
case VDEV_STATE_DEGRADED:
@@ -771,7 +778,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
(void) printf("\t%*s%-*s", depth, "", namewidth - depth, name);
if (vs->vs_aux != 0) {
- (void) printf(" %-8s ", state_to_name(vs->vs_state));
+ (void) printf(" %-8s ", state_to_name(vs));
switch (vs->vs_aux) {
case VDEV_AUX_OPEN_FAILED:
@@ -791,7 +798,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
break;
}
} else {
- (void) printf(" %s", state_to_name(vs->vs_state));
+ (void) printf(" %s", state_to_name(vs));
}
(void) printf("\n");
@@ -867,6 +874,11 @@ show_import(nvlist_t *config)
"are offlined.\n"));
break;
+ case ZPOOL_STATUS_CORRUPT_POOL:
+ (void) printf(gettext("status: The pool metadata is "
+ "corrupted.\n"));
+ break;
+
default:
/*
* No other status can be seen when importing pools.
@@ -1671,7 +1683,7 @@ list_callback(zpool_handle_t *zhp, void *data)
verify(nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_STATS, (uint64_t **)&vs,
&vsc) == 0);
- (void) strlcpy(buf, state_to_name(vs->vs_state),
+ (void) strlcpy(buf, state_to_name(vs),
sizeof (buf));
}
break;
@@ -2081,6 +2093,42 @@ zpool_do_offline(int argc, char **argv)
return (ret);
}
+/*
+ * zpool clear <pool> [device]
+ *
+ * Clear all errors associated with a pool or a particular device.
+ */
+int
+zpool_do_clear(int argc, char **argv)
+{
+ int ret = 0;
+ zpool_handle_t *zhp;
+ char *pool, *device;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, gettext("missing pool name\n"));
+ usage(FALSE);
+ }
+
+ if (argc > 3) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(FALSE);
+ }
+
+ pool = argv[1];
+ device = argc == 3 ? argv[2] : NULL;
+
+ if ((zhp = zpool_open(pool)) == NULL)
+ return (1);
+
+ if (zpool_clear(zhp, device) != 0)
+ ret = 1;
+
+ zpool_close(zhp);
+
+ return (ret);
+}
+
typedef struct scrub_cbdata {
int cb_type;
} scrub_cbdata_t;
@@ -2090,6 +2138,15 @@ scrub_callback(zpool_handle_t *zhp, void *data)
{
scrub_cbdata_t *cb = data;
+ /*
+ * Ignore faulted pools.
+ */
+ if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+ (void) fprintf(stderr, gettext("cannot scrub '%s': pool is "
+ "currently unavailable\n"), zpool_get_name(zhp));
+ return (1);
+ }
+
return (zpool_scrub(zhp, cb->cb_type) != 0);
}
@@ -2201,8 +2258,9 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
nvlist_t **child;
uint_t c, children;
vdev_stat_t *vs;
- char rbuf[6], wbuf[6], cbuf[6], repaired[6];
+ char rbuf[6], wbuf[6], cbuf[6], repaired[7];
char *vname;
+ uint64_t notpresent;
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
(uint64_t **)&vs, &c) == 0);
@@ -2212,14 +2270,19 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
children = 0;
(void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth,
- name, state_to_name(vs->vs_state));
+ name, state_to_name(vs));
zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
- if (vs->vs_aux != 0) {
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+ &notpresent) == 0) {
+ char *path;
+ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+ (void) printf(" was %s\n", path);
+ } else if (vs->vs_aux != 0) {
(void) printf(" ");
switch (vs->vs_aux) {
@@ -2259,6 +2322,60 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
}
}
+static void
+print_error_log(zpool_handle_t *zhp)
+{
+ nvlist_t **log;
+ size_t nelem;
+ size_t maxdsname = sizeof ("DATASET") - 1;
+ size_t maxobjname = sizeof ("OBJECT") - 1;
+ int i;
+ nvlist_t *nv;
+ size_t len;
+ char *dsname, *objname, *range;
+
+ if (zpool_get_errlog(zhp, &log, &nelem) != 0) {
+ (void) printf("errors: List of errors unavailable "
+ "(insufficient privileges)\n");
+ return;
+ }
+
+ for (i = 0; i < nelem; i++) {
+ nv = log[i];
+
+ verify(nvlist_lookup_string(nv, ZPOOL_ERR_DATASET,
+ &dsname) == 0);
+ len = strlen(dsname);
+ if (len > maxdsname)
+ maxdsname = len;
+
+ verify(nvlist_lookup_string(nv, ZPOOL_ERR_OBJECT,
+ &objname) == 0);
+ len = strlen(objname);
+ if (len > maxobjname)
+ maxobjname = len;
+ }
+
+ (void) printf("errors: The following persistent errors have been "
+ "detected:\n\n");
+ (void) printf("%8s %-*s %-*s %s\n", "", maxdsname, "DATASET",
+ maxobjname, "OBJECT", "RANGE");
+
+ for (i = 0; i < nelem; i++) {
+ nv = log[i];
+
+ verify(nvlist_lookup_string(nv, ZPOOL_ERR_DATASET,
+ &dsname) == 0);
+ verify(nvlist_lookup_string(nv, ZPOOL_ERR_OBJECT,
+ &objname) == 0);
+ verify(nvlist_lookup_string(nv, ZPOOL_ERR_RANGE,
+ &range) == 0);
+
+ (void) printf("%8s %-*s %-*s %s\n", "", maxdsname,
+ dsname, maxobjname, objname, range);
+ }
+}
+
/*
* Display a summary of pool status. Displays a summary such as:
*
@@ -2269,7 +2386,7 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
* config:
* mirror DEGRADED
* c1t0d0 OK
- * c2t0d0 FAULTED
+ * c2t0d0 UNAVAIL
*
* When given the '-v' option, we print out the complete config. If the '-e'
* option is specified, then we print out error rate information as well.
@@ -2348,7 +2465,7 @@ status_callback(zpool_handle_t *zhp, void *data)
"unaffected.\n"));
(void) printf(gettext("action: Determine if the device needs "
"to be replaced, and clear the errors\n\tusing "
- "'zpool online' or replace the device with 'zpool "
+ "'zpool clear' or replace the device with 'zpool "
"replace'.\n"));
break;
@@ -2370,6 +2487,22 @@ status_callback(zpool_handle_t *zhp, void *data)
"complete.\n"));
break;
+ case ZPOOL_STATUS_CORRUPT_DATA:
+ (void) printf(gettext("status: One or more devices has "
+ "experienced an error resulting in data\n\tcorruption. "
+ "Applications may be affected.\n"));
+ (void) printf(gettext("action: Restore the file in question "
+ "if possible. Otherwise restore the\n\tentire pool from "
+ "backup.\n"));
+ break;
+
+ case ZPOOL_STATUS_CORRUPT_POOL:
+ (void) printf(gettext("status: The pool metadata is corrupted "
+ "and the pool cannot be opened.\n"));
+ (void) printf(gettext("action: Destroy and re-create the pool "
+ "from a backup source.\n"));
+ break;
+
default:
/*
* The remaining errors can't actually be generated, yet.
@@ -2383,6 +2516,8 @@ status_callback(zpool_handle_t *zhp, void *data)
if (config != NULL) {
int namewidth;
+ uint64_t nerr;
+ size_t realerr;
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
@@ -2399,6 +2534,28 @@ status_callback(zpool_handle_t *zhp, void *data)
"NAME", "STATE", "READ", "WRITE", "CKSUM");
print_status_config(zhp, zpool_get_name(zhp), nvroot,
namewidth, 0);
+
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
+ &nerr) == 0) {
+ /*
+ * If the approximate error count is small, get a
+ * precise count by fetching the entire log and
+ * uniquifying the results.
+ */
+ if (nerr < 100 && !cbp->cb_verbose &&
+ zpool_get_errlog(zhp, NULL, &realerr) == 0)
+ nerr = realerr;
+
+ (void) printf("\n");
+ if (nerr == 0)
+ (void) printf(gettext("errors: No known data "
+ "errors\n"));
+ else if (!cbp->cb_verbose)
+ (void) printf(gettext("errors: %d data errors, "
+ "use '-v' for a list\n"), nerr);
+ else
+ print_error_log(zhp);
+ }
} else {
(void) printf(gettext("config: The configuration cannot be "
"determined.\n"));
@@ -2507,8 +2664,8 @@ main(int argc, char **argv)
* 'freeze' is a vile debugging abomination, so we treat it as such.
*/
if (strcmp(cmdname, "freeze") == 0 && argc == 3) {
- char buf[8192];
- int fd = open("/dev/zpoolctl", O_RDWR);
+ char buf[16384];
+ int fd = open(ZFS_DEV, O_RDWR);
(void) strcpy((void *)buf, argv[2]);
return (!!ioctl(fd, ZFS_IOC_POOL_FREEZE, buf));
}
diff --git a/usr/src/cmd/zpool/zpool_vdev.c b/usr/src/cmd/zpool/zpool_vdev.c
index 2dd85062be..6fba820d10 100644
--- a/usr/src/cmd/zpool/zpool_vdev.c
+++ b/usr/src/cmd/zpool/zpool_vdev.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -112,6 +111,13 @@ vdev_error(const char *fmt, ...)
static void
libdiskmgt_error(int error)
{
+ /*
+ * ENXIO is a valid error message if the device doesn't live in
+ * /dev/dsk. Don't bother printing an error message in this case.
+ */
+ if (error == ENXIO)
+ return;
+
(void) fprintf(stderr, gettext("warning: device in use checking "
"failed: %s\n"), strerror(error));
}
diff --git a/usr/src/cmd/ztest/Makefile b/usr/src/cmd/ztest/Makefile
index 52e17eb413..1a34525b2d 100644
--- a/usr/src/cmd/ztest/Makefile
+++ b/usr/src/cmd/ztest/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -43,13 +42,9 @@ lint := TARGET = lint
all clean clobber lint: $(SUBDIRS)
-#
-# This should really be $(LN), but protocmp detects link inconsistencies
-# between isaexec (which we ship) and ztest (which we do not ship).
-#
install: $(SUBDIRS)
-$(RM) $(ROOTPROG)
- -$(CP) $(ISAEXEC) $(ROOTPROG)
+ -$(LN) $(ISAEXEC) $(ROOTPROG)
$(SUBDIRS): FRC
@cd $@; pwd; $(MAKE) $(TARGET)
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index 07cda80045..13d8b81f36 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -132,6 +132,7 @@ typedef struct ztest_args {
uint64_t za_random;
uint64_t za_diroff;
uint64_t za_diroff_shared;
+ uint64_t za_zil_seq;
hrtime_t za_start;
hrtime_t za_stop;
hrtime_t za_kill;
@@ -183,7 +184,7 @@ ztest_info_t ztest_info[] = {
{ ztest_traverse, &zopt_often },
{ ztest_dsl_prop_get_set, &zopt_sometimes },
{ ztest_dmu_objset_create_destroy, &zopt_sometimes },
- { ztest_dmu_snapshot_create_destroy, &zopt_sometimes },
+ { ztest_dmu_snapshot_create_destroy, &zopt_rarely },
{ ztest_spa_create_destroy, &zopt_sometimes },
{ ztest_fault_inject, &zopt_sometimes },
{ ztest_spa_rename, &zopt_rarely },
@@ -777,12 +778,12 @@ ztest_vdev_add_remove(ztest_args_t *za)
(void) mutex_lock(&ztest_shared->zs_vdev_lock);
- spa_config_enter(spa, RW_READER);
+ spa_config_enter(spa, RW_READER, FTAG);
ztest_shared->zs_vdev_primaries =
spa->spa_root_vdev->vdev_children * leaves;
- spa_config_exit(spa);
+ spa_config_exit(spa, FTAG);
nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1);
error = spa_vdev_add(spa, nvroot);
@@ -799,6 +800,35 @@ ztest_vdev_add_remove(ztest_args_t *za)
(void) printf("spa_vdev_add = %d, as expected\n", error);
}
+static vdev_t *
+vdev_lookup_by_path(vdev_t *vd, const char *path)
+{
+ int c;
+ vdev_t *mvd;
+
+ if (vd->vdev_path != NULL) {
+ if (vd->vdev_wholedisk == 1) {
+ /*
+ * For whole disks, the internal path has 's0', but the
+ * path passed in by the user doesn't.
+ */
+ if (strlen(path) == strlen(vd->vdev_path) - 2 &&
+ strncmp(path, vd->vdev_path, strlen(path)) == 0)
+ return (vd);
+ } else if (strcmp(path, vd->vdev_path) == 0) {
+ return (vd);
+ }
+ }
+
+ for (c = 0; c < vd->vdev_children; c++)
+ if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
+ NULL)
+ return (mvd);
+
+ return (NULL);
+}
+
+
/*
* Verify that we can attach and detach devices.
*/
@@ -807,19 +837,19 @@ ztest_vdev_attach_detach(ztest_args_t *za)
{
spa_t *spa = dmu_objset_spa(za->za_os);
vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd0, *vd1, *pvd;
+ vdev_t *oldvd, *newvd, *pvd;
nvlist_t *root, *file;
uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
uint64_t leaf, top;
- size_t size0, size1;
- char path0[MAXPATHLEN], path1[MAXPATHLEN];
+ size_t oldsize, newsize;
+ char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
int replacing;
int error, expected_error;
int fd;
(void) mutex_lock(&ztest_shared->zs_vdev_lock);
- spa_config_enter(spa, RW_READER);
+ spa_config_enter(spa, RW_READER, FTAG);
/*
* Decide whether to do an attach or a replace.
@@ -840,84 +870,83 @@ ztest_vdev_attach_detach(ztest_args_t *za)
* Generate the path to this leaf. The filename will end with 'a'.
* We'll alternate replacements with a filename that ends with 'b'.
*/
- (void) snprintf(path0, sizeof (path0),
+ (void) snprintf(oldpath, sizeof (oldpath),
ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf);
- bcopy(path0, path1, MAXPATHLEN);
+ bcopy(oldpath, newpath, MAXPATHLEN);
/*
* If the 'a' file isn't part of the pool, the 'b' file must be.
*/
- if (vdev_lookup_by_path(rvd, path0) == NULL)
- path0[strlen(path0) - 1] = 'b';
+ if (vdev_lookup_by_path(rvd, oldpath) == NULL)
+ oldpath[strlen(oldpath) - 1] = 'b';
else
- path1[strlen(path1) - 1] = 'b';
+ newpath[strlen(newpath) - 1] = 'b';
/*
- * Now path0 represents something that's already in the pool,
- * and path1 is the thing we'll try to attach.
+ * Now oldpath represents something that's already in the pool,
+ * and newpath is the thing we'll try to attach.
*/
- vd0 = vdev_lookup_by_path(rvd, path0);
- vd1 = vdev_lookup_by_path(rvd, path1);
- ASSERT(vd0 != NULL);
- pvd = vd0->vdev_parent;
-
+ oldvd = vdev_lookup_by_path(rvd, oldpath);
+ newvd = vdev_lookup_by_path(rvd, newpath);
+ ASSERT(oldvd != NULL);
+ pvd = oldvd->vdev_parent;
/*
- * Make size1 a little bigger or smaller than size0.
+ * Make newsize a little bigger or smaller than oldsize.
* If it's smaller, the attach should fail.
* If it's larger, and we're doing a replace,
* we should get dynamic LUN growth when we're done.
*/
- size0 = vd0->vdev_psize;
- size1 = 10 * size0 / (9 + ztest_random(3));
+ oldsize = vdev_get_rsize(oldvd);
+ newsize = 10 * oldsize / (9 + ztest_random(3));
/*
* If pvd is not a mirror or root, the attach should fail with ENOTSUP,
* unless it's a replace; in that case any non-replacing parent is OK.
*
- * If vd1 is already part of the pool, it should fail with EBUSY.
+ * If newvd is already part of the pool, it should fail with EBUSY.
*
- * If vd1 is too small, it should fail with EOVERFLOW.
+ * If newvd is too small, it should fail with EOVERFLOW.
*/
if (pvd->vdev_ops != &vdev_mirror_ops &&
pvd->vdev_ops != &vdev_root_ops &&
(!replacing || pvd->vdev_ops == &vdev_replacing_ops))
expected_error = ENOTSUP;
- else if (vd1 != NULL)
+ else if (newvd != NULL)
expected_error = EBUSY;
- else if (size1 < size0)
+ else if (newsize < oldsize)
expected_error = EOVERFLOW;
else
expected_error = 0;
/*
- * If vd1 isn't already part of the pool, create it.
+ * If newvd isn't already part of the pool, create it.
*/
- if (vd1 == NULL) {
- fd = open(path1, O_RDWR | O_CREAT | O_TRUNC, 0666);
+ if (newvd == NULL) {
+ fd = open(newpath, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (fd == -1)
- fatal(1, "can't open %s", path1);
- if (ftruncate(fd, size1) != 0)
- fatal(1, "can't ftruncate %s", path1);
+ fatal(1, "can't open %s", newpath);
+ if (ftruncate(fd, newsize) != 0)
+ fatal(1, "can't ftruncate %s", newpath);
(void) close(fd);
}
- spa_config_exit(spa);
+ spa_config_exit(spa, FTAG);
/*
- * Build the nvlist describing path1.
+ * Build the nvlist describing newpath.
*/
VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path1) == 0);
+ VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, newpath) == 0);
VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
&file, 1) == 0);
- error = spa_vdev_attach(spa, path0, root, replacing);
+ error = spa_vdev_attach(spa, oldvd->vdev_guid, root, replacing);
nvlist_free(file);
nvlist_free(root);
@@ -939,7 +968,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
if (error != expected_error) {
fatal(0, "attach (%s, %s, %d) returned %d, expected %d",
- path0, path1, replacing, error, expected_error);
+ oldpath, newpath, replacing, error, expected_error);
}
(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
@@ -964,9 +993,9 @@ ztest_vdev_LUN_growth(ztest_args_t *za)
/*
* Pick a random leaf vdev.
*/
- spa_config_enter(spa, RW_READER);
+ spa_config_enter(spa, RW_READER, FTAG);
vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
- spa_config_exit(spa);
+ spa_config_exit(spa, FTAG);
(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
@@ -1219,6 +1248,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
dmu_objset_close(os2);
}
+ txg_wait_synced(dmu_objset_pool(os), 0);
zil_close(zilog);
dmu_objset_close(os);
@@ -1268,6 +1298,26 @@ ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
traverse_handle_t *th = za->za_th;
uint64_t size = BP_GET_LSIZE(bp);
+ /*
+ * Level -1 indicates the objset_phys_t or something in its intent log.
+ */
+ if (zb->zb_level == -1) {
+ if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+ ASSERT3U(zb->zb_object, ==, 0);
+ ASSERT3U(zb->zb_blkid, ==, 0);
+ ASSERT3U(size, ==, sizeof (objset_phys_t));
+ za->za_zil_seq = 0;
+ } else if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
+ ASSERT3U(zb->zb_object, ==, 0);
+ ASSERT3U(zb->zb_blkid, >, za->za_zil_seq);
+ za->za_zil_seq = zb->zb_blkid;
+ } else {
+ ASSERT3U(zb->zb_object, !=, 0); /* lr_write_t */
+ }
+
+ return (0);
+ }
+
ASSERT(dnp != NULL);
if (bc->bc_errno)
@@ -1309,11 +1359,6 @@ ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
return (0);
}
- if (zb->zb_level == -1) {
- ASSERT3U(size, ==, sizeof (objset_phys_t));
- return (0);
- }
-
ASSERT(zb->zb_level == 0);
ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT);
@@ -1346,6 +1391,9 @@ ztest_traverse(ztest_args_t *za)
if (ztest_random(2) == 0)
advance |= ADVANCE_HOLES;
+ if (ztest_random(2) == 0)
+ advance |= ADVANCE_ZIL;
+
th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance,
ZIO_FLAG_CANFAIL);
@@ -1361,7 +1409,7 @@ ztest_traverse(ztest_args_t *za)
if (zopt_verbose >= 5)
(void) printf("traverse %s%s%s%s %llu blocks to "
- "<%llu, %llu, %d, %llx>%s\n",
+ "<%llu, %llu, %lld, %llx>%s\n",
(advance & ADVANCE_PRE) ? "pre" : "post",
(advance & ADVANCE_PRUNE) ? "|prune" : "",
(advance & ADVANCE_DATA) ? "|data" : "",
@@ -1369,7 +1417,7 @@ ztest_traverse(ztest_args_t *za)
(u_longlong_t)(th->th_callbacks - cbstart),
(u_longlong_t)th->th_lastcb.zb_objset,
(u_longlong_t)th->th_lastcb.zb_object,
- th->th_lastcb.zb_level,
+ (u_longlong_t)th->th_lastcb.zb_level,
(u_longlong_t)th->th_lastcb.zb_blkid,
rc == 0 ? " [done]" :
rc == EINTR ? " [aborted]" :
@@ -1406,7 +1454,8 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
/*
* Create a batch object if necessary, and record it in the directory.
*/
- dmu_read(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t), &batchobj);
+ VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+ sizeof (uint64_t), &batchobj));
if (batchobj == 0) {
tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
@@ -1430,23 +1479,21 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
* Destroy the previous batch of objects.
*/
for (b = 0; b < batchsize; b++) {
- dmu_read(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object);
+ VERIFY(0 == dmu_read(os, batchobj, b * sizeof (uint64_t),
+ sizeof (uint64_t), &object));
if (object == 0)
continue;
/*
* Read and validate contents.
* We expect the nth byte of the bonus buffer to be n.
*/
- db = dmu_bonus_hold(os, object);
+ VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
dmu_object_info_from_db(db, &doi);
ASSERT(doi.doi_type == DMU_OT_UINT64_OTHER);
ASSERT(doi.doi_bonus_type == DMU_OT_PLAIN_OTHER);
ASSERT3S(doi.doi_physical_blks, >=, 0);
- dmu_buf_read(db);
-
bonuslen = db->db_size;
for (c = 0; c < bonuslen; c++) {
@@ -1460,12 +1507,13 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
}
}
- dmu_buf_rele(db);
+ dmu_buf_rele(db, FTAG);
/*
* We expect the word at endoff to be our object number.
*/
- dmu_read(os, object, endoff, sizeof (uint64_t), &temp);
+ VERIFY(0 == dmu_read(os, object, endoff,
+ sizeof (uint64_t), &temp));
if (temp != object) {
fatal(0, "bad data in %s, got %llu, expected %llu",
@@ -1564,7 +1612,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
/*
* Write to both the bonus buffer and the regular data.
*/
- db = dmu_bonus_hold(os, object);
+ VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
ASSERT3U(bonuslen, ==, db->db_size);
dmu_object_size_from_db(db, &va_blksize, &va_nblocks);
@@ -1579,7 +1627,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
for (c = 0; c < db->db_size; c++)
((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen);
- dmu_buf_rele(db);
+ dmu_buf_rele(db, FTAG);
/*
* Write to a large offset to increase indirection.
@@ -1647,7 +1695,8 @@ ztest_dmu_read_write(ztest_args_t *za)
/*
* Read the directory info. If it's the first time, set things up.
*/
- dmu_read(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd);
+ VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+ sizeof (dd), &dd));
if (dd.dd_chunk == 0) {
ASSERT(dd.dd_packobj == 0);
ASSERT(dd.dd_bigobj == 0);
@@ -1709,8 +1758,10 @@ ztest_dmu_read_write(ztest_args_t *za)
/*
* Read the current contents of our objects.
*/
- dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf);
- dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf);
+ error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf);
+ ASSERT3U(error, ==, 0);
+ error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf);
+ ASSERT3U(error, ==, 0);
/*
* Get a tx for the mods to both packobj and bigobj.
@@ -1792,7 +1843,8 @@ ztest_dmu_read_write(ztest_args_t *za)
(u_longlong_t)bigsize,
(u_longlong_t)txg);
}
- dmu_free_range(os, dd.dd_bigobj, bigoff, bigsize, tx);
+ VERIFY(0 == dmu_free_range(os, dd.dd_bigobj, bigoff,
+ bigsize, tx));
} else {
if (zopt_verbose >= 6) {
(void) printf("writing offset %llx size %llx"
@@ -1813,8 +1865,10 @@ ztest_dmu_read_write(ztest_args_t *za)
void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
- dmu_read(os, dd.dd_packobj, packoff, packsize, packcheck);
- dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigcheck);
+ VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
+ packsize, packcheck));
+ VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
+ bigsize, bigcheck));
ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
@@ -1890,7 +1944,8 @@ ztest_dmu_write_parallel(ztest_args_t *za)
if (do_free) {
(void) mutex_lock(lp);
- dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx);
+ VERIFY(0 == dmu_free_range(os, ZTEST_DIROBJ, off,
+ bs, tx));
(void) mutex_unlock(lp);
dmu_tx_commit(tx);
continue;
@@ -1904,9 +1959,9 @@ ztest_dmu_write_parallel(ztest_args_t *za)
if (off == -1ULL) {
wbt.bt_seq = 0;
- db = dmu_bonus_hold(os, ZTEST_DIROBJ);
+ VERIFY(0 == dmu_bonus_hold(os, ZTEST_DIROBJ,
+ FTAG, &db));
ASSERT3U(db->db_size, ==, sizeof (wbt));
- dmu_buf_read(db);
bcopy(db->db_data, &rbt, db->db_size);
if (rbt.bt_objset != 0) {
ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset);
@@ -1916,7 +1971,7 @@ ztest_dmu_write_parallel(ztest_args_t *za)
}
dmu_buf_will_dirty(db, tx);
bcopy(&wbt, db->db_data, db->db_size);
- dmu_buf_rele(db);
+ dmu_buf_rele(db, FTAG);
dmu_tx_commit(tx);
continue;
}
@@ -1940,6 +1995,7 @@ ztest_dmu_write_parallel(ztest_args_t *za)
if (ztest_random(2) == 0) {
blkptr_t blk = { 0 };
uint64_t blkoff;
+ zbookmark_t zb;
txg_suspend(dmu_objset_pool(os));
(void) mutex_lock(lp);
@@ -1969,9 +2025,13 @@ ztest_dmu_write_parallel(ztest_args_t *za)
* We do this while still txg_suspend()ed to ensure
* that the block can't be reused before we read it.
*/
+ zb.zb_objset = dmu_objset_id(os);
+ zb.zb_object = ZTEST_DIROBJ;
+ zb.zb_level = 0;
+ zb.zb_blkid = off / bs;
error = zio_wait(zio_read(NULL, dmu_objset_spa(os),
&blk, iobuf, bs, NULL, NULL,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED));
+ ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, &zb));
ASSERT(error == 0);
txg_resume(dmu_objset_pool(os));
@@ -2025,13 +2085,14 @@ ztest_zap(ztest_args_t *za)
/*
* Create a new object if necessary, and record it in the directory.
*/
- dmu_read(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t), &object);
+ VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+ sizeof (uint64_t), &object));
if (object == 0) {
tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
sizeof (uint64_t));
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 2);
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
ztest_record_enospc("create zap test obj");
@@ -2123,7 +2184,7 @@ ztest_zap(ztest_args_t *za)
* should be txg + object + n.
*/
tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, 2);
+ dmu_tx_hold_zap(tx, object, TRUE, NULL);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
ztest_record_enospc("create zap entry");
@@ -2168,7 +2229,7 @@ ztest_zap(ztest_args_t *za)
ASSERT3U(error, ==, 0);
tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, 2);
+ dmu_tx_hold_zap(tx, object, TRUE, NULL);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
ztest_record_enospc("remove zap entry");
@@ -2265,7 +2326,7 @@ ztest_zap_parallel(ztest_args_t *za)
if (i >= 2) {
tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, 1);
+ dmu_tx_hold_zap(tx, object, TRUE, NULL);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
ztest_record_enospc("zap parallel");
@@ -2334,6 +2395,7 @@ ztest_dsl_prop_get_set(ztest_args_t *za)
const char *prop, *valname;
char setpoint[MAXPATHLEN];
char osname[MAXNAMELEN];
+ int error;
(void) rw_rdlock(&ztest_shared->zs_name_lock);
@@ -2350,8 +2412,15 @@ ztest_dsl_prop_get_set(ztest_args_t *za)
inherit = (value == ZIO_COMPRESS_INHERIT);
}
- VERIFY3U(dsl_prop_set(osname, prop, sizeof (value),
- !inherit, &value), ==, 0);
+ error = dsl_prop_set(osname, prop, sizeof (value),
+ !inherit, &value);
+
+ if (error == ENOSPC) {
+ ztest_record_enospc("dsl_prop_set");
+ break;
+ }
+
+ ASSERT3U(error, ==, 0);
VERIFY3U(dsl_prop_get(osname, prop, sizeof (value),
1, &value, setpoint), ==, 0);
@@ -2370,6 +2439,21 @@ ztest_dsl_prop_get_set(ztest_args_t *za)
(void) rw_unlock(&ztest_shared->zs_name_lock);
}
+static void
+ztest_error_setup(vdev_t *vd, int mode, int mask, uint64_t arg)
+{
+ int c;
+
+ for (c = 0; c < vd->vdev_children; c++)
+ ztest_error_setup(vd->vdev_child[c], mode, mask, arg);
+
+ if (vd->vdev_path != NULL) {
+ vd->vdev_fault_mode = mode;
+ vd->vdev_fault_mask = mask;
+ vd->vdev_fault_arg = arg;
+ }
+}
+
/*
* Inject random faults into the on-disk data.
*/
@@ -2382,20 +2466,28 @@ ztest_fault_inject(ztest_args_t *za)
uint64_t bad = 0x1990c0ffeedecade;
uint64_t top, leaf;
char path0[MAXPATHLEN];
- char path1[MAXPATHLEN];
char pathrand[MAXPATHLEN];
size_t fsize;
spa_t *spa = dmu_objset_spa(za->za_os);
int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
int iters = 1000;
- int ftype;
+ vdev_t *vd0;
+ uint64_t guid0 = 0;
+
+ /*
+ * We can't inject faults when we have no fault tolerance.
+ */
+ if (zopt_maxfaults == 0)
+ return;
+
+ ASSERT(leaves >= 2);
/*
* Pick a random top-level vdev.
*/
- spa_config_enter(spa, RW_READER);
+ spa_config_enter(spa, RW_READER, FTAG);
top = ztest_random(spa->spa_root_vdev->vdev_children);
- spa_config_exit(spa);
+ spa_config_exit(spa, FTAG);
/*
* Pick a random leaf.
@@ -2403,73 +2495,45 @@ ztest_fault_inject(ztest_args_t *za)
leaf = ztest_random(leaves);
/*
- * Generate paths to the first to leaves in this top-level vdev,
+ * Generate paths to the first two leaves in this top-level vdev,
* and to the random leaf we selected. We'll induce transient
- * faults on leaves 0 and 1, we'll online/offline leaf 1,
+ * I/O errors and random online/offline activity on leaf 0,
* and we'll write random garbage to the randomly chosen leaf.
*/
(void) snprintf(path0, sizeof (path0),
ztest_dev_template, zopt_dir, zopt_pool, top * leaves + 0);
- (void) snprintf(path1, sizeof (path1),
- ztest_dev_template, zopt_dir, zopt_pool, top * leaves + 1);
(void) snprintf(pathrand, sizeof (pathrand),
ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf);
- if (leaves < 2) /* there is no second leaf */
- path1[0] = '\0';
+ dprintf("damaging %s and %s\n", path0, pathrand);
- dprintf("damaging %s, %s, and %s\n", path0, path1, pathrand);
+ spa_config_enter(spa, RW_READER, FTAG);
/*
- * If we have exactly one-fault tolerance, just randomly offline
- * and online one device.
+ * If we can tolerate two or more faults, make vd0 fail randomly.
*/
- if (zopt_maxfaults == 1 && path1[0] != '\0') {
- if (ztest_random(10) < 6)
- (void) vdev_offline(spa, path1, B_TRUE);
- else
- (void) vdev_online(spa, path1);
- return;
+ vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
+ if (vd0 != NULL && zopt_maxfaults >= 2) {
+ guid0 = vd0->vdev_guid;
+ ztest_error_setup(vd0, VDEV_FAULT_COUNT,
+ (1U << ZIO_TYPE_READ) | (1U << ZIO_TYPE_WRITE), 100);
}
- /*
- * Always inject a little random device failure, regardless of
- * the replication level. The I/Os should be retried successfully.
- * If we only have single-fault tolerance, don't inject write
- * faults, because then we'll be doing partial writes and won't
- * be able to recover when we inject data corruption.
- */
- if (zopt_maxfaults <= 1)
- ftype = (1U << ZIO_TYPE_READ);
- else
- ftype = (1U << ZIO_TYPE_READ) | (1U << ZIO_TYPE_WRITE);
-
- (void) vdev_error_setup(spa, path0, VDEV_FAULT_COUNT, ftype, 10);
+ spa_config_exit(spa, FTAG);
/*
- * If we can tolerate three or more faults, make one of the
- * devices fail quite a lot.
+ * If we can tolerate two or more faults, randomly online/offline vd0.
*/
- if (zopt_maxfaults >= 3 && path1[0] != '\0')
- (void) vdev_error_setup(spa, path1, VDEV_FAULT_COUNT,
- ftype, 100);
-
- /*
- * If we can tolerate four or more faults, offline one of the devices.
- */
- if (zopt_maxfaults >= 4 && path1[0] != '\0') {
+ if (zopt_maxfaults >= 2 && guid0 != 0) {
if (ztest_random(10) < 6)
- (void) vdev_offline(spa, path1, B_TRUE);
+ (void) vdev_offline(spa, guid0, B_TRUE);
else
- (void) vdev_online(spa, path1);
+ (void) vdev_online(spa, guid0);
}
/*
- * If we have at least single-fault tolerance, inject data corruption.
+ * We have at least single-fault tolerance, so inject data corruption.
*/
- if (zopt_maxfaults < 1)
- return;
-
fd = open(pathrand, O_RDWR);
if (fd == -1) /* we hit a gap in the device namespace */
@@ -2497,19 +2561,6 @@ ztest_fault_inject(ztest_args_t *za)
(void) close(fd);
}
-static void
-ztest_error_setup(vdev_t *vd, int mode, int mask, uint64_t arg)
-{
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
- ztest_error_setup(vd->vdev_child[c], mode, mask, arg);
-
- if (vd->vdev_path != NULL)
- (void) vdev_error_setup(vd->vdev_spa, vd->vdev_path,
- mode, mask, arg);
-}
-
/*
* Scrub the pool.
*/
@@ -2634,6 +2685,8 @@ ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
char dev_name[MAXPATHLEN];
nvlist_t *file, *root;
int error;
+ uint64_t guid;
+ vdev_t *vd;
(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
@@ -2649,7 +2702,13 @@ ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
&file, 1) == 0);
- error = spa_vdev_attach(spa, dev_name, root, B_TRUE);
+ spa_config_enter(spa, RW_READER, FTAG);
+ if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, dev_name)) == NULL)
+ guid = 0;
+ else
+ guid = vd->vdev_guid;
+ spa_config_exit(spa, FTAG);
+ error = spa_vdev_attach(spa, guid, root, B_TRUE);
if (error != 0 && error != EBUSY && error != ENOTSUP && error != ENODEV)
fatal(0, "spa_vdev_attach(in-place) = %d", error);
@@ -2943,7 +3002,8 @@ ztest_run(char *pool)
for (d = -5; d <= 5; d++) {
error = dmu_object_info(spa->spa_meta_objset,
(1ULL << t) + d, NULL);
- ASSERT(error == 0 || error == ENOENT);
+ ASSERT(error == 0 || error == ENOENT ||
+ error == EINVAL);
}
}
@@ -3016,6 +3076,7 @@ ztest_run(char *pool)
if (za[t].za_th)
traverse_fini(za[t].za_th);
if (t < zopt_dirs) {
+ txg_wait_synced(spa_get_dsl(spa), 0);
zil_close(za[t].za_zilog);
dmu_objset_close(za[t].za_os);
}
@@ -3046,11 +3107,7 @@ ztest_run(char *pool)
(void) rw_unlock(&ztest_shared->zs_name_lock);
}
- /*
- * Prepare every leaf device to inject a few random read faults.
- */
- ztest_error_setup(spa->spa_root_vdev, VDEV_FAULT_COUNT,
- (1U << ZIO_TYPE_READ), 10);
+ txg_wait_synced(spa_get_dsl(spa), 0);
/*
* Right before closing the pool, kick off a bunch of async I/O;
@@ -3141,11 +3198,6 @@ main(int argc, char **argv)
/* Override location of zpool.cache */
spa_config_dir = "/tmp";
- /*
- * Blow away any existing copy of zpool.cache
- */
- (void) remove("/tmp/zpool.cache");
-
ztest_random_fd = open("/dev/urandom", O_RDONLY);
process_options(argc, argv);
@@ -3155,6 +3207,12 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
+ /*
+ * Blow away any existing copy of zpool.cache
+ */
+ if (zopt_init != 0)
+ (void) remove("/tmp/zpool.cache");
+
zs = ztest_shared = (void *)mmap(0,
P2ROUNDUP(sizeof (ztest_shared_t), getpagesize()),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);