diff options
author | Bill Pijewski <wdp@joyent.com> | 2013-08-22 19:03:54 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2013-08-23 11:43:15 -0700 |
commit | 810e43b2eb0e320833671a403fdda51917e8b036 (patch) | |
tree | a774526dde88456d4e42ed5c9cf54a2cb77538d2 /usr/src/uts/common/fs/zfs/zvol.c | |
parent | 1b3b16f35bee1ffc210591d82bca6adf247954b0 (diff) | |
download | illumos-gate-810e43b2eb0e320833671a403fdda51917e8b036.tar.gz |
2932 support crash dumps to raidz, etc. pools
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Dan McDonald <danmcd@nexenta.com>
Diffstat (limited to 'usr/src/uts/common/fs/zfs/zvol.c')
-rw-r--r-- | usr/src/uts/common/fs/zfs/zvol.c | 119 |
1 files changed, 94 insertions, 25 deletions
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 5fad8cfdaf..d53b5b28c6 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -25,6 +25,7 @@ * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ /* @@ -54,6 +55,7 @@ #include <sys/stat.h> #include <sys/zap.h> #include <sys/spa.h> +#include <sys/spa_impl.h> #include <sys/zio.h> #include <sys/dmu_traverse.h> #include <sys/dnode.h> @@ -77,10 +79,14 @@ #include <sys/zfs_rlock.h> #include <sys/vdev_disk.h> #include <sys/vdev_impl.h> +#include <sys/vdev_raidz.h> #include <sys/zvol.h> #include <sys/dumphdr.h> #include <sys/zil_impl.h> #include <sys/dbuf.h> +#include <sys/dmu_tx.h> +#include <sys/zfeature.h> +#include <sys/zio_checksum.h> #include "zfs_namecheck.h" @@ -1101,27 +1107,28 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, } static int -zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size, - boolean_t doread, boolean_t isdump) +zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, + uint64_t size, boolean_t doread, boolean_t isdump) { vdev_disk_t *dvd; int c; int numerrors = 0; - for (c = 0; c < vd->vdev_children; c++) { - ASSERT(vd->vdev_ops == &vdev_mirror_ops || - vd->vdev_ops == &vdev_replacing_ops || - vd->vdev_ops == &vdev_spare_ops); - int err = zvol_dumpio_vdev(vd->vdev_child[c], - addr, offset, size, doread, isdump); - if (err != 0) { - numerrors++; - } else if (doread) { - break; + if (vd->vdev_ops == &vdev_mirror_ops || + vd->vdev_ops == &vdev_replacing_ops || + vd->vdev_ops == &vdev_spare_ops) { + for (c = 0; c < vd->vdev_children; c++) { + int err = zvol_dumpio_vdev(vd->vdev_child[c], + addr, offset, origoffset, size, doread, isdump); + if (err != 0) { + numerrors++; + } else if (doread) { + break; + } } } - if (!vd->vdev_ops->vdev_op_leaf) + if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops) return (numerrors < vd->vdev_children ? 0 : EIO); if (doread && !vdev_readable(vd)) @@ -1129,19 +1136,26 @@ zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size, else if (!doread && !vdev_writeable(vd)) return (SET_ERROR(EIO)); - dvd = vd->vdev_tsd; - ASSERT3P(dvd, !=, NULL); + if (vd->vdev_ops == &vdev_raidz_ops) { + return (vdev_raidz_physio(vd, + addr, size, offset, origoffset, doread, isdump)); + } + offset += VDEV_LABEL_START_SIZE; if (ddi_in_panic() || isdump) { ASSERT(!doread); if (doread) return (SET_ERROR(EIO)); + dvd = vd->vdev_tsd; + ASSERT3P(dvd, !=, NULL); return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), lbtodb(size))); } else { - return (vdev_disk_physio(dvd->vd_lh, addr, size, offset, - doread ? B_READ : B_WRITE)); + dvd = vd->vdev_tsd; + ASSERT3P(dvd, !=, NULL); + return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size, + offset, doread ? B_READ : B_WRITE)); } } @@ -1176,7 +1190,8 @@ zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); offset += DVA_GET_OFFSET(&ze->ze_dva); - error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump); + error = zvol_dumpio_vdev(vd, addr, offset, DVA_GET_OFFSET(&ze->ze_dva), + size, doread, isdump); if (!ddi_in_panic()) spa_config_exit(spa, SCL_STATE, FTAG); @@ -1196,7 +1211,7 @@ zvol_strategy(buf_t *bp) rl_t *rl; int error = 0; boolean_t doread = bp->b_flags & B_READ; - boolean_t is_dump; + boolean_t is_dumpified; boolean_t sync; if (getminor(bp->b_edev) == 0) { @@ -1239,11 +1254,11 @@ zvol_strategy(buf_t *bp) return (0); } - is_dump = zv->zv_flags & ZVOL_DUMPIFIED; + is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED; sync = ((!(bp->b_flags & B_ASYNC) && !(zv->zv_flags & ZVOL_WCE)) || (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) && - !doread && !is_dump; + !doread && !is_dumpified; /* * There must be no buffer changes when doing a dmu_sync() because @@ -1254,7 +1269,7 @@ zvol_strategy(buf_t *bp) while (resid != 0 && off < volsize) { size_t size = MIN(resid, zvol_maxphys); - if (is_dump) { + if (is_dumpified) { size = MIN(size, P2END(off, zv->zv_volblocksize) - off); error = zvol_dumpio(zv, addr, off, size, doread, B_FALSE); @@ -1813,21 +1828,67 @@ zvol_fini(void) ddi_soft_state_fini(&zfsdev_state); } +/*ARGSUSED*/ +static int +zfs_mvdev_dump_feature_check(void *arg, dmu_tx_t *tx) +{ + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + + if (spa_feature_is_active(spa, + &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP])) + return (1); + return (0); +} + +/*ARGSUSED*/ +static void +zfs_mvdev_dump_activate_feature_sync(void *arg, dmu_tx_t *tx) +{ + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + + spa_feature_incr(spa, + &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP], tx); +} + static int zvol_dump_init(zvol_state_t *zv, boolean_t resize) { dmu_tx_t *tx; - int error = 0; + int error; objset_t *os = zv->zv_objset; + spa_t *spa = dmu_objset_spa(os); + vdev_t *vd = spa->spa_root_vdev; nvlist_t *nv = NULL; - uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); + uint64_t version = spa_version(spa); + enum zio_checksum checksum; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); + ASSERT(vd->vdev_ops == &vdev_root_ops); + error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, DMU_OBJECT_END); /* wait for dmu_free_long_range to actually free the blocks */ txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); + /* + * If the pool on which the dump device is being initialized has more + * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is + * enabled. If so, bump that feature's counter to indicate that the + * feature is active. We also check the vdev type to handle the + * following case: + * # zpool create test raidz disk1 disk2 disk3 + * Now have spa_root_vdev->vdev_children == 1 (the raidz vdev), + * the raidz vdev itself has 3 children. + */ + if (vd->vdev_children > 1 || vd->vdev_ops == &vdev_raidz_ops) { + if (!spa_feature_is_enabled(spa, + &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP])) + return (SET_ERROR(ENOTSUP)); + (void) dsl_sync_task(spa_name(spa), + zfs_mvdev_dump_feature_check, + zfs_mvdev_dump_activate_feature_sync, NULL, 2); + } + tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); dmu_tx_hold_bonus(tx, ZVOL_OBJ); @@ -1838,6 +1899,14 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) } /* + * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum + * function. Otherwise, use the old default -- OFF. + */ + checksum = spa_feature_is_active(spa, + &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP]) ? + ZIO_CHECKSUM_NOPARITY : ZIO_CHECKSUM_OFF; + + /* * If we are resizing the dump device then we only need to * update the refreservation to match the newly updated * zvolsize. Otherwise, we save off the original state of the @@ -1900,7 +1969,7 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) ZIO_COMPRESS_OFF) == 0); VERIFY(nvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_CHECKSUM), - ZIO_CHECKSUM_OFF) == 0); + checksum) == 0); if (version >= SPA_VERSION_DEDUP) { VERIFY(nvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_DEDUP), |