summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Pijewski <wdp@joyent.com>2013-07-29 10:02:39 -0700
committerBill Pijewski <wdp@joyent.com>2013-07-29 10:02:39 -0700
commit4d48a1076df6f2309f4faf65ee88126c63e00477 (patch)
tree8f2f6d492de3aced8ca398c7fc546cc9ae511f2b
parent40ce447c4107dfa47fa3b3c7515037aa1a7b63d9 (diff)
downloadillumos-joyent-4d48a1076df6f2309f4faf65ee88126c63e00477.tar.gz
STOR-111 RAID-Z crash dump improvements from code review comments
-rw-r--r--usr/src/common/zfs/zfeature_common.c3
-rw-r--r--usr/src/common/zfs/zfeature_common.h1
-rw-r--r--usr/src/man/man5/zpool-features.521
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_disk.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_raidz.h4
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zio.h1
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_disk.c13
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz.c4
-rw-r--r--usr/src/uts/common/fs/zfs/zio_checksum.c8
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c55
10 files changed, 90 insertions, 23 deletions
diff --git a/usr/src/common/zfs/zfeature_common.c b/usr/src/common/zfs/zfeature_common.c
index 19afefe90f..f467b2f91c 100644
--- a/usr/src/common/zfs/zfeature_common.c
+++ b/usr/src/common/zfs/zfeature_common.c
@@ -164,4 +164,7 @@ zpool_feature_init(void)
zfeature_register(SPA_FEATURE_FS_SS_LIMIT,
"com.joyent:filesystem_limits", "filesystem_limits",
"Filesystem and snapshot limits.", B_TRUE, B_FALSE, NULL);
+ zfeature_register(SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
+ "com.joyent:multi_vdev_crash_dump", "multi_vdev_crash_dump",
+ "Crash dumps to multiple vdev pools.", B_TRUE, B_FALSE, NULL);
}
diff --git a/usr/src/common/zfs/zfeature_common.h b/usr/src/common/zfs/zfeature_common.h
index 2223f48c66..b056fb29f8 100644
--- a/usr/src/common/zfs/zfeature_common.h
+++ b/usr/src/common/zfs/zfeature_common.h
@@ -56,6 +56,7 @@ enum spa_feature {
SPA_FEATURE_EMPTY_BPOBJ,
SPA_FEATURE_LZ4_COMPRESS,
SPA_FEATURE_FS_SS_LIMIT,
+ SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
SPA_FEATURES
} spa_feature_t;
diff --git a/usr/src/man/man5/zpool-features.5 b/usr/src/man/man5/zpool-features.5
index 8d521c7ee2..b67c84f720 100644
--- a/usr/src/man/man5/zpool-features.5
+++ b/usr/src/man/man5/zpool-features.5
@@ -230,5 +230,26 @@ moment, this operation cannot be reversed. Booting off of
\fBlz4\fR-compressed root pools is supported.
.RE
+.sp
+.ne 2
+.na
+\fB\fBmulti_vdev_crash_dump\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID com.joyent:multi_vdev_crash_dump
+READ\-ONLY COMPATIBLE yes
+DEPENDENCIES none
+.TE
+
+This feature allows a dump device to be configured with a pool comprised
+of multiple vdevs. Those vdevs may be arranged in any mirrored or raidz
+configuration.
+
+When the \fBmulti_vdev_crash_dump\fR feature is set to \fBenabled\fR,
+the administrator can use the \fBdumpadm\fR(1M) command to configure a
+dump device.
+
.SH "SEE ALSO"
\fBzpool\fR(1M)
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_disk.h b/usr/src/uts/common/fs/zfs/sys/vdev_disk.h
index 1f68b47e84..b2fa0a0da4 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_disk.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_disk.h
@@ -50,7 +50,8 @@ typedef struct vdev_disk {
} vdev_disk_t;
#endif
-extern int vdev_disk_physio(vdev_t *, caddr_t, size_t, uint64_t, int);
+extern int vdev_disk_physio(vdev_t *,
+ caddr_t, size_t, uint64_t, int, boolean_t);
/*
* Since vdev_disk.c is not compiled into libzpool, this function should only be
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h b/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h
index 40616280b8..7e75050f3e 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_VDEV_RAIDZ_H
@@ -39,7 +39,7 @@ extern "C" {
#ifdef _KERNEL
extern int vdev_raidz_physio(vdev_t *,
- caddr_t, size_t, uint64_t, uint64_t, boolean_t);
+ caddr_t, size_t, uint64_t, uint64_t, boolean_t, boolean_t);
#endif
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h
index a8fb9f9c4d..6bd9830687 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h
@@ -80,7 +80,6 @@ enum zio_checksum {
ZIO_CHECKSUM_FLETCHER_4,
ZIO_CHECKSUM_SHA256,
ZIO_CHECKSUM_ZILOG2,
- ZIO_CHECKSUM_SHA256_MAC,
ZIO_CHECKSUM_NOPARITY,
ZIO_CHECKSUM_FUNCTIONS
};
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c
index a0a151db71..23ab2e0420 100644
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c
@@ -591,7 +591,7 @@ vdev_disk_close(vdev_t *vd)
int
vdev_disk_physio(vdev_t *vd, caddr_t data,
- size_t size, uint64_t offset, int flags)
+ size_t size, uint64_t offset, int flags, boolean_t isdump)
{
vdev_disk_t *dvd = vd->vdev_tsd;
@@ -603,6 +603,17 @@ vdev_disk_physio(vdev_t *vd, caddr_t data,
return (EIO);
ASSERT(vd->vdev_ops == &vdev_disk_ops);
+
+ /*
+ * If in the context of an active crash dump, use the ldi_dump(9F)
+ * call instead of ldi_strategy(9F) as usual.
+ */
+ if (isdump) {
+ ASSERT3P(dvd, !=, NULL);
+ return (ldi_dump(dvd->vd_lh, data, lbtodb(offset),
+ lbtodb(size)));
+ }
+
return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
}
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index b22bcd2b2f..e47632ea70 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -1586,7 +1586,7 @@ vdev_raidz_close(vdev_t *vd)
*/
int
vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size,
- uint64_t offset, uint64_t origoffset, boolean_t doread)
+ uint64_t offset, uint64_t origoffset, boolean_t doread, boolean_t isdump)
{
vdev_t *tvd = vd->vdev_top;
vdev_t *cvd;
@@ -1659,7 +1659,7 @@ vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size,
if ((err = vdev_disk_physio(cvd,
((char *)rc->rc_data) + colskip, colsize,
VDEV_LABEL_OFFSET(rc->rc_offset) + colskip,
- flags)) != 0)
+ flags, isdump)) != 0)
break;
}
diff --git a/usr/src/uts/common/fs/zfs/zio_checksum.c b/usr/src/uts/common/fs/zfs/zio_checksum.c
index 1a3b51030a..72bf9ac949 100644
--- a/usr/src/uts/common/fs/zfs/zio_checksum.c
+++ b/usr/src/uts/common/fs/zfs/zio_checksum.c
@@ -68,13 +68,6 @@ zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
}
-/*
- * The sha256_mac checksum algorithm was added to try to maintain on-disk
- * compatibility with ZFS on other platforms. That effort didn't work for other
- * reasons. As a result, the sha256_mac algorithm is unused except in the rare
- * case of an older platform interpreting noparity as sha256_mac -- which is why
- * they both are no-ops.
- */
zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
{{NULL, NULL}, 0, 0, 0, "inherit"},
{{NULL, NULL}, 0, 0, 0, "on"},
@@ -86,7 +79,6 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
{{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"},
{{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"},
{{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"},
- {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "sha256_mac"},
{{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "noparity"},
};
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index 3659a87746..0e6319fb6b 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -54,6 +54,7 @@
#include <sys/stat.h>
#include <sys/zap.h>
#include <sys/spa.h>
+#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/dmu_traverse.h>
#include <sys/dnode.h>
@@ -83,6 +84,7 @@
#include <sys/zil_impl.h>
#include <sys/sdt.h>
#include <sys/dbuf.h>
+#include <sys/zfeature.h>
#include "zfs_namecheck.h"
@@ -1134,7 +1136,7 @@ zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset,
if (vd->vdev_ops == &vdev_raidz_ops) {
return (vdev_raidz_physio(vd,
- addr, size, offset, origoffset, doread));
+ addr, size, offset, origoffset, doread, isdump));
}
offset += VDEV_LABEL_START_SIZE;
@@ -1208,7 +1210,7 @@ zvol_strategy(buf_t *bp)
rl_t *rl;
int error = 0;
boolean_t doread = bp->b_flags & B_READ;
- boolean_t is_dump;
+ boolean_t is_dumpified;
boolean_t sync;
if (getminor(bp->b_edev) == 0) {
@@ -1251,11 +1253,11 @@ zvol_strategy(buf_t *bp)
return (0);
}
- is_dump = zv->zv_flags & ZVOL_DUMPIFIED;
+ is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED;
sync = ((!(bp->b_flags & B_ASYNC) &&
!(zv->zv_flags & ZVOL_WCE)) ||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) &&
- !doread && !is_dump;
+ !doread && !is_dumpified;
/*
* There must be no buffer changes when doing a dmu_sync() because
@@ -1266,7 +1268,7 @@ zvol_strategy(buf_t *bp)
while (resid != 0 && off < volsize) {
size_t size = MIN(resid, zvol_maxphys);
- if (is_dump) {
+ if (is_dumpified) {
size = MIN(size, P2END(off, zv->zv_volblocksize) - off);
error = zvol_dumpio(zv, addr, off, size,
doread, B_FALSE);
@@ -1840,11 +1842,14 @@ zvol_fini(void)
static int
zvol_dump_init(zvol_state_t *zv, boolean_t resize)
{
- dmu_tx_t *tx;
- int error = 0;
objset_t *os = zv->zv_objset;
+ spa_t *spa = dmu_objset_spa(os);
+ vdev_t *vd = spa->spa_root_vdev;
+ uint64_t version = spa_version(spa);
+
+ dmu_tx_t *tx;
nvlist_t *nv = NULL;
- uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
+ int error;
ASSERT(MUTEX_HELD(&zfsdev_state_lock));
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0,
@@ -1862,6 +1867,24 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize)
}
/*
+ * If the pool on which the dump device is being initialized has more
+ * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is
+ * enabled. If so, bump that feature's counter to indicate that the
+ * feature is active. Only bump the counter if the feature was
+ * previously inactive.
+ */
+ ASSERT(vd->vdev_ops == &vdev_root_ops);
+ if (vd->vdev_children > 1) {
+ zfeature_info_t *feature =
+ &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP];
+
+ if (!spa_feature_is_enabled(spa, feature))
+ return (SET_ERROR(ENOTSUP));
+ if (!spa_feature_is_active(spa, feature))
+ spa_feature_incr(spa, feature, tx);
+ }
+
+ /*
* If we are resizing the dump device then we only need to
* update the refreservation to match the newly updated
* zvolsize. Otherwise, we save off the original state of the
@@ -2002,6 +2025,9 @@ zvol_dump_fini(zvol_state_t *zv)
{
dmu_tx_t *tx;
objset_t *os = zv->zv_objset;
+ spa_t *spa = dmu_objset_spa(os);
+ vdev_t *vd = spa->spa_root_vdev;
+
nvlist_t *nv;
int error = 0;
uint64_t checksum, compress, refresrv, vbs, dedup;
@@ -2021,6 +2047,19 @@ zvol_dump_fini(zvol_state_t *zv)
dmu_tx_abort(tx);
return (error);
}
+
+ /*
+ * As in zvol_dump_init(), decrement the MULTI_VDEV_CRASH_DUMP feature's
+ * refcount if this vdev has more than one child.
+ */
+ if (vd->vdev_children > 1) {
+ zfeature_info_t *feature =
+ &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP];
+
+ if (spa_feature_is_active(spa, feature))
+ spa_feature_decr(spa, feature, tx);
+ }
+
(void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx);
dmu_tx_commit(tx);