summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs/zfs/zvol.c
diff options
context:
space:
mode:
authorBill Pijewski <wdp@joyent.com>2013-08-22 19:03:54 +0000
committerRobert Mustacchi <rm@joyent.com>2013-08-23 11:43:15 -0700
commit810e43b2eb0e320833671a403fdda51917e8b036 (patch)
treea774526dde88456d4e42ed5c9cf54a2cb77538d2 /usr/src/uts/common/fs/zfs/zvol.c
parent1b3b16f35bee1ffc210591d82bca6adf247954b0 (diff)
downloadillumos-gate-810e43b2eb0e320833671a403fdda51917e8b036.tar.gz
2932 support crash dumps to raidz, etc. pools
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Approved by: Dan McDonald <danmcd@nexenta.com>
Diffstat (limited to 'usr/src/uts/common/fs/zfs/zvol.c')
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c119
1 files changed, 94 insertions, 25 deletions
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index 5fad8cfdaf..d53b5b28c6 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -25,6 +25,7 @@
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -54,6 +55,7 @@
#include <sys/stat.h>
#include <sys/zap.h>
#include <sys/spa.h>
+#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/dmu_traverse.h>
#include <sys/dnode.h>
@@ -77,10 +79,14 @@
#include <sys/zfs_rlock.h>
#include <sys/vdev_disk.h>
#include <sys/vdev_impl.h>
+#include <sys/vdev_raidz.h>
#include <sys/zvol.h>
#include <sys/dumphdr.h>
#include <sys/zil_impl.h>
#include <sys/dbuf.h>
+#include <sys/dmu_tx.h>
+#include <sys/zfeature.h>
+#include <sys/zio_checksum.h>
#include "zfs_namecheck.h"
@@ -1101,27 +1107,28 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
}
static int
-zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size,
- boolean_t doread, boolean_t isdump)
+zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset,
+ uint64_t size, boolean_t doread, boolean_t isdump)
{
vdev_disk_t *dvd;
int c;
int numerrors = 0;
- for (c = 0; c < vd->vdev_children; c++) {
- ASSERT(vd->vdev_ops == &vdev_mirror_ops ||
- vd->vdev_ops == &vdev_replacing_ops ||
- vd->vdev_ops == &vdev_spare_ops);
- int err = zvol_dumpio_vdev(vd->vdev_child[c],
- addr, offset, size, doread, isdump);
- if (err != 0) {
- numerrors++;
- } else if (doread) {
- break;
+ if (vd->vdev_ops == &vdev_mirror_ops ||
+ vd->vdev_ops == &vdev_replacing_ops ||
+ vd->vdev_ops == &vdev_spare_ops) {
+ for (c = 0; c < vd->vdev_children; c++) {
+ int err = zvol_dumpio_vdev(vd->vdev_child[c],
+ addr, offset, origoffset, size, doread, isdump);
+ if (err != 0) {
+ numerrors++;
+ } else if (doread) {
+ break;
+ }
}
}
- if (!vd->vdev_ops->vdev_op_leaf)
+ if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops)
return (numerrors < vd->vdev_children ? 0 : EIO);
if (doread && !vdev_readable(vd))
@@ -1129,19 +1136,26 @@ zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size,
else if (!doread && !vdev_writeable(vd))
return (SET_ERROR(EIO));
- dvd = vd->vdev_tsd;
- ASSERT3P(dvd, !=, NULL);
+ if (vd->vdev_ops == &vdev_raidz_ops) {
+ return (vdev_raidz_physio(vd,
+ addr, size, offset, origoffset, doread, isdump));
+ }
+
offset += VDEV_LABEL_START_SIZE;
if (ddi_in_panic() || isdump) {
ASSERT(!doread);
if (doread)
return (SET_ERROR(EIO));
+ dvd = vd->vdev_tsd;
+ ASSERT3P(dvd, !=, NULL);
return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
lbtodb(size)));
} else {
- return (vdev_disk_physio(dvd->vd_lh, addr, size, offset,
- doread ? B_READ : B_WRITE));
+ dvd = vd->vdev_tsd;
+ ASSERT3P(dvd, !=, NULL);
+ return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
+ offset, doread ? B_READ : B_WRITE));
}
}
@@ -1176,7 +1190,8 @@ zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva));
offset += DVA_GET_OFFSET(&ze->ze_dva);
- error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump);
+ error = zvol_dumpio_vdev(vd, addr, offset, DVA_GET_OFFSET(&ze->ze_dva),
+ size, doread, isdump);
if (!ddi_in_panic())
spa_config_exit(spa, SCL_STATE, FTAG);
@@ -1196,7 +1211,7 @@ zvol_strategy(buf_t *bp)
rl_t *rl;
int error = 0;
boolean_t doread = bp->b_flags & B_READ;
- boolean_t is_dump;
+ boolean_t is_dumpified;
boolean_t sync;
if (getminor(bp->b_edev) == 0) {
@@ -1239,11 +1254,11 @@ zvol_strategy(buf_t *bp)
return (0);
}
- is_dump = zv->zv_flags & ZVOL_DUMPIFIED;
+ is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED;
sync = ((!(bp->b_flags & B_ASYNC) &&
!(zv->zv_flags & ZVOL_WCE)) ||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) &&
- !doread && !is_dump;
+ !doread && !is_dumpified;
/*
* There must be no buffer changes when doing a dmu_sync() because
@@ -1254,7 +1269,7 @@ zvol_strategy(buf_t *bp)
while (resid != 0 && off < volsize) {
size_t size = MIN(resid, zvol_maxphys);
- if (is_dump) {
+ if (is_dumpified) {
size = MIN(size, P2END(off, zv->zv_volblocksize) - off);
error = zvol_dumpio(zv, addr, off, size,
doread, B_FALSE);
@@ -1813,21 +1828,67 @@ zvol_fini(void)
ddi_soft_state_fini(&zfsdev_state);
}
+/*ARGSUSED*/
+static int
+zfs_mvdev_dump_feature_check(void *arg, dmu_tx_t *tx)
+{
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+
+ if (spa_feature_is_active(spa,
+ &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP]))
+ return (1);
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+zfs_mvdev_dump_activate_feature_sync(void *arg, dmu_tx_t *tx)
+{
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+
+ spa_feature_incr(spa,
+ &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP], tx);
+}
+
static int
zvol_dump_init(zvol_state_t *zv, boolean_t resize)
{
dmu_tx_t *tx;
- int error = 0;
+ int error;
objset_t *os = zv->zv_objset;
+ spa_t *spa = dmu_objset_spa(os);
+ vdev_t *vd = spa->spa_root_vdev;
nvlist_t *nv = NULL;
- uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
+ uint64_t version = spa_version(spa);
+ enum zio_checksum checksum;
ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+ ASSERT(vd->vdev_ops == &vdev_root_ops);
+
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0,
DMU_OBJECT_END);
/* wait for dmu_free_long_range to actually free the blocks */
txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
+ /*
+ * If the pool on which the dump device is being initialized has more
+ * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is
+ * enabled. If so, bump that feature's counter to indicate that the
+ * feature is active. We also check the vdev type to handle the
+ * following case:
+ * # zpool create test raidz disk1 disk2 disk3
+ * Now have spa_root_vdev->vdev_children == 1 (the raidz vdev),
+ * the raidz vdev itself has 3 children.
+ */
+ if (vd->vdev_children > 1 || vd->vdev_ops == &vdev_raidz_ops) {
+ if (!spa_feature_is_enabled(spa,
+ &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP]))
+ return (SET_ERROR(ENOTSUP));
+ (void) dsl_sync_task(spa_name(spa),
+ zfs_mvdev_dump_feature_check,
+ zfs_mvdev_dump_activate_feature_sync, NULL, 2);
+ }
+
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
dmu_tx_hold_bonus(tx, ZVOL_OBJ);
@@ -1838,6 +1899,14 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize)
}
/*
+ * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum
+ * function. Otherwise, use the old default -- OFF.
+ */
+ checksum = spa_feature_is_active(spa,
+ &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP]) ?
+ ZIO_CHECKSUM_NOPARITY : ZIO_CHECKSUM_OFF;
+
+ /*
* If we are resizing the dump device then we only need to
* update the refreservation to match the newly updated
* zvolsize. Otherwise, we save off the original state of the
@@ -1900,7 +1969,7 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize)
ZIO_COMPRESS_OFF) == 0);
VERIFY(nvlist_add_uint64(nv,
zfs_prop_to_name(ZFS_PROP_CHECKSUM),
- ZIO_CHECKSUM_OFF) == 0);
+ checksum) == 0);
if (version >= SPA_VERSION_DEDUP) {
VERIFY(nvlist_add_uint64(nv,
zfs_prop_to_name(ZFS_PROP_DEDUP),