diff options
author | Bill Pijewski <wdp@joyent.com> | 2013-07-29 10:02:39 -0700 |
---|---|---|
committer | Bill Pijewski <wdp@joyent.com> | 2013-07-29 10:02:39 -0700 |
commit | 4d48a1076df6f2309f4faf65ee88126c63e00477 (patch) | |
tree | 8f2f6d492de3aced8ca398c7fc546cc9ae511f2b | |
parent | 40ce447c4107dfa47fa3b3c7515037aa1a7b63d9 (diff) | |
download | illumos-joyent-4d48a1076df6f2309f4faf65ee88126c63e00477.tar.gz |
STOR-111 RAID-Z crash dump improvements from code review comments
-rw-r--r-- | usr/src/common/zfs/zfeature_common.c | 3 | ||||
-rw-r--r-- | usr/src/common/zfs/zfeature_common.h | 1 | ||||
-rw-r--r-- | usr/src/man/man5/zpool-features.5 | 21 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev_disk.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev_raidz.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zio.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_disk.c | 13 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_raidz.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zio_checksum.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zvol.c | 55 |
10 files changed, 90 insertions, 23 deletions
diff --git a/usr/src/common/zfs/zfeature_common.c b/usr/src/common/zfs/zfeature_common.c index 19afefe90f..f467b2f91c 100644 --- a/usr/src/common/zfs/zfeature_common.c +++ b/usr/src/common/zfs/zfeature_common.c @@ -164,4 +164,7 @@ zpool_feature_init(void) zfeature_register(SPA_FEATURE_FS_SS_LIMIT, "com.joyent:filesystem_limits", "filesystem_limits", "Filesystem and snapshot limits.", B_TRUE, B_FALSE, NULL); + zfeature_register(SPA_FEATURE_MULTI_VDEV_CRASH_DUMP, + "com.joyent:multi_vdev_crash_dump", "multi_vdev_crash_dump", + "Crash dumps to multiple vdev pools.", B_TRUE, B_FALSE, NULL); } diff --git a/usr/src/common/zfs/zfeature_common.h b/usr/src/common/zfs/zfeature_common.h index 2223f48c66..b056fb29f8 100644 --- a/usr/src/common/zfs/zfeature_common.h +++ b/usr/src/common/zfs/zfeature_common.h @@ -56,6 +56,7 @@ enum spa_feature { SPA_FEATURE_EMPTY_BPOBJ, SPA_FEATURE_LZ4_COMPRESS, SPA_FEATURE_FS_SS_LIMIT, + SPA_FEATURE_MULTI_VDEV_CRASH_DUMP, SPA_FEATURES } spa_feature_t; diff --git a/usr/src/man/man5/zpool-features.5 b/usr/src/man/man5/zpool-features.5 index 8d521c7ee2..b67c84f720 100644 --- a/usr/src/man/man5/zpool-features.5 +++ b/usr/src/man/man5/zpool-features.5 @@ -230,5 +230,26 @@ moment, this operation cannot be reversed. Booting off of \fBlz4\fR-compressed root pools is supported. .RE +.sp +.ne 2 +.na +\fB\fBmulti_vdev_crash_dump\fR\fR +.ad +.RS 4n +.TS +l l . +GUID com.joyent:multi_vdev_crash_dump +READ\-ONLY COMPATIBLE yes +DEPENDENCIES none +.TE + +This feature allows a dump device to be configured with a pool comprised +of multiple vdevs. Those vdevs may be arranged in any mirrored or raidz +configuration. + +When the \fBmulti_vdev_crash_dump\fR feature is set to \fBenabled\fR, +the administrator can use the \fBdumpadm\fR(1M) command to configure a +dump device. + .SH "SEE ALSO" \fBzpool\fR(1M) diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_disk.h b/usr/src/uts/common/fs/zfs/sys/vdev_disk.h index 1f68b47e84..b2fa0a0da4 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_disk.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_disk.h @@ -50,7 +50,8 @@ typedef struct vdev_disk { } vdev_disk_t; #endif -extern int vdev_disk_physio(vdev_t *, caddr_t, size_t, uint64_t, int); +extern int vdev_disk_physio(vdev_t *, + caddr_t, size_t, uint64_t, int, boolean_t); /* * Since vdev_disk.c is not compiled into libzpool, this function should only be diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h b/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h index 40616280b8..7e75050f3e 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2011 Joyent, Inc. All rights reserved. + * Copyright 2013 Joyent, Inc. All rights reserved. */ #ifndef _SYS_VDEV_RAIDZ_H @@ -39,7 +39,7 @@ extern "C" { #ifdef _KERNEL extern int vdev_raidz_physio(vdev_t *, - caddr_t, size_t, uint64_t, uint64_t, boolean_t); + caddr_t, size_t, uint64_t, uint64_t, boolean_t, boolean_t); #endif #ifdef __cplusplus } diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index a8fb9f9c4d..6bd9830687 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -80,7 +80,6 @@ enum zio_checksum { ZIO_CHECKSUM_FLETCHER_4, ZIO_CHECKSUM_SHA256, ZIO_CHECKSUM_ZILOG2, - ZIO_CHECKSUM_SHA256_MAC, ZIO_CHECKSUM_NOPARITY, ZIO_CHECKSUM_FUNCTIONS }; diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c index a0a151db71..23ab2e0420 100644 --- a/usr/src/uts/common/fs/zfs/vdev_disk.c +++ b/usr/src/uts/common/fs/zfs/vdev_disk.c @@ -591,7 +591,7 @@ vdev_disk_close(vdev_t *vd) int vdev_disk_physio(vdev_t *vd, caddr_t data, - size_t size, uint64_t offset, int flags) + size_t size, uint64_t offset, int flags, boolean_t isdump) { vdev_disk_t *dvd = vd->vdev_tsd; @@ -603,6 +603,17 @@ vdev_disk_physio(vdev_t *vd, caddr_t data, return (EIO); ASSERT(vd->vdev_ops == &vdev_disk_ops); + + /* + * If in the context of an active crash dump, use the ldi_dump(9F) + * call instead of ldi_strategy(9F) as usual. + */ + if (isdump) { + ASSERT3P(dvd, !=, NULL); + return (ldi_dump(dvd->vd_lh, data, lbtodb(offset), + lbtodb(size))); + } + return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags)); } diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c index b22bcd2b2f..e47632ea70 100644 --- a/usr/src/uts/common/fs/zfs/vdev_raidz.c +++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c @@ -1586,7 +1586,7 @@ vdev_raidz_close(vdev_t *vd) */ int vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size, - uint64_t offset, uint64_t origoffset, boolean_t doread) + uint64_t offset, uint64_t origoffset, boolean_t doread, boolean_t isdump) { vdev_t *tvd = vd->vdev_top; vdev_t *cvd; @@ -1659,7 +1659,7 @@ vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size, if ((err = vdev_disk_physio(cvd, ((char *)rc->rc_data) + colskip, colsize, VDEV_LABEL_OFFSET(rc->rc_offset) + colskip, - flags)) != 0) + flags, isdump)) != 0) break; } diff --git a/usr/src/uts/common/fs/zfs/zio_checksum.c b/usr/src/uts/common/fs/zfs/zio_checksum.c index 1a3b51030a..72bf9ac949 100644 --- a/usr/src/uts/common/fs/zfs/zio_checksum.c +++ b/usr/src/uts/common/fs/zfs/zio_checksum.c @@ -68,13 +68,6 @@ zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); } -/* - * The sha256_mac checksum algorithm was added to try to maintain on-disk - * compatibility with ZFS on other platforms. That effort didn't work for other - * reasons. As a result, the sha256_mac algorithm is unused except in the rare - * case of an older platform interpreting noparity as sha256_mac -- which is why - * they both are no-ops. - */ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { {{NULL, NULL}, 0, 0, 0, "inherit"}, {{NULL, NULL}, 0, 0, 0, "on"}, @@ -86,7 +79,6 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"}, - {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "sha256_mac"}, {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "noparity"}, }; diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 3659a87746..0e6319fb6b 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -54,6 +54,7 @@ #include <sys/stat.h> #include <sys/zap.h> #include <sys/spa.h> +#include <sys/spa_impl.h> #include <sys/zio.h> #include <sys/dmu_traverse.h> #include <sys/dnode.h> @@ -83,6 +84,7 @@ #include <sys/zil_impl.h> #include <sys/sdt.h> #include <sys/dbuf.h> +#include <sys/zfeature.h> #include "zfs_namecheck.h" @@ -1134,7 +1136,7 @@ zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, if (vd->vdev_ops == &vdev_raidz_ops) { return (vdev_raidz_physio(vd, - addr, size, offset, origoffset, doread)); + addr, size, offset, origoffset, doread, isdump)); } offset += VDEV_LABEL_START_SIZE; @@ -1208,7 +1210,7 @@ zvol_strategy(buf_t *bp) rl_t *rl; int error = 0; boolean_t doread = bp->b_flags & B_READ; - boolean_t is_dump; + boolean_t is_dumpified; boolean_t sync; if (getminor(bp->b_edev) == 0) { @@ -1251,11 +1253,11 @@ zvol_strategy(buf_t *bp) return (0); } - is_dump = zv->zv_flags & ZVOL_DUMPIFIED; + is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED; sync = ((!(bp->b_flags & B_ASYNC) && !(zv->zv_flags & ZVOL_WCE)) || (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) && - !doread && !is_dump; + !doread && !is_dumpified; /* * There must be no buffer changes when doing a dmu_sync() because @@ -1266,7 +1268,7 @@ zvol_strategy(buf_t *bp) while (resid != 0 && off < volsize) { size_t size = MIN(resid, zvol_maxphys); - if (is_dump) { + if (is_dumpified) { size = MIN(size, P2END(off, zv->zv_volblocksize) - off); error = zvol_dumpio(zv, addr, off, size, doread, B_FALSE); @@ -1840,11 +1842,14 @@ zvol_fini(void) static int zvol_dump_init(zvol_state_t *zv, boolean_t resize) { - dmu_tx_t *tx; - int error = 0; objset_t *os = zv->zv_objset; + spa_t *spa = dmu_objset_spa(os); + vdev_t *vd = spa->spa_root_vdev; + uint64_t version = spa_version(spa); + + dmu_tx_t *tx; nvlist_t *nv = NULL; - uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); + int error; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, @@ -1862,6 +1867,24 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) } /* + * If the pool on which the dump device is being initialized has more + * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is + * enabled. If so, bump that feature's counter to indicate that the + * feature is active. Only bump the counter if the feature was + * previously inactive. + */ + ASSERT(vd->vdev_ops == &vdev_root_ops); + if (vd->vdev_children > 1) { + zfeature_info_t *feature = + &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP]; + + if (!spa_feature_is_enabled(spa, feature)) + return (SET_ERROR(ENOTSUP)); + if (!spa_feature_is_active(spa, feature)) + spa_feature_incr(spa, feature, tx); + } + + /* * If we are resizing the dump device then we only need to * update the refreservation to match the newly updated * zvolsize. Otherwise, we save off the original state of the @@ -2002,6 +2025,9 @@ zvol_dump_fini(zvol_state_t *zv) { dmu_tx_t *tx; objset_t *os = zv->zv_objset; + spa_t *spa = dmu_objset_spa(os); + vdev_t *vd = spa->spa_root_vdev; + nvlist_t *nv; int error = 0; uint64_t checksum, compress, refresrv, vbs, dedup; @@ -2021,6 +2047,19 @@ zvol_dump_fini(zvol_state_t *zv) dmu_tx_abort(tx); return (error); } + + /* + * As in zvol_dump_init(), decrement the MULTI_VDEV_CRASH_DUMP feature's + * refcount if this vdev has more than one child. + */ + if (vd->vdev_children > 1) { + zfeature_info_t *feature = + &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP]; + + if (spa_feature_is_active(spa, feature)) + spa_feature_decr(spa, feature, tx); + } + (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); dmu_tx_commit(tx); |