summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Zeller <mike@mikezeller.net>2018-10-24 12:27:04 -0400
committerTrent Mick <trentm@gmail.com>2018-10-24 12:27:04 -0400
commit3bd1e76265e7655e601372e51bc2413198378122 (patch)
tree738a2278205087fe3444161c0208748a2f925d72
parentf5903b691655d2bffd8dfb3bcee76486d7d59240 (diff)
downloadillumos-joyent-cr4997-OS-4718.tar.gz
OS-4718 ZFS actively hostile to 512e drive replacementscr4997-OS-4718
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c10
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h1
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_disk.c22
3 files changed, 29 insertions, 4 deletions
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 599ae9fade..ffc1363ced 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -5733,11 +5733,15 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
/*
- * The new device cannot have a higher alignment requirement
- * than the top-level vdev.
+ * The new device cannot have a higher alignment requirement than the
+ * top-level vdev. If this is an Advanced Format (e.g. 512e) disk, we
+ * also need to check the fallback logical ashift value.
*/
- if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
+ if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift &&
+ (newvd->vdev_ashift_af == 0 ||
+ newvd->vdev_ashift_af > oldvd->vdev_top->vdev_ashift)) {
return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
+ }
/*
* If this is an in-place replacement, update oldvd's path and devid
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 71753cf24f..76acba5b32 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -201,6 +201,7 @@ struct vdev {
uint64_t vdev_min_asize; /* min acceptable asize */
uint64_t vdev_max_asize; /* max acceptable asize */
uint64_t vdev_ashift; /* block alignment shift */
+ uint64_t vdev_ashift_af; /* adv. format fallback shift */
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
uint64_t vdev_prevstate; /* used when reopening a vdev */
vdev_ops_t *vdev_ops; /* vdev operations */
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c
index e4b86b419b..b7bbc48aff 100644
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c
@@ -530,11 +530,31 @@ skip_open:
VDEV_DEBUG("vdev_disk_open(\"%s\"): "
"both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n",
vd->vdev_path, error);
- pbsize = DEV_BSIZE;
+ blksz = pbsize = DEV_BSIZE;
}
*ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1;
+ /*
+ * Advanced Format (512e) disks have a 4KB physical sector size, but
+ * also report a 512 byte logical sector size (through emulation in the
+ * firmware) to better support legacy operating systems. While we
+ * generally wish to create new pools with a 4KB block size, we also
+ * need to allow people to use AF disks in their existing 512 byte
+ * pools, even if not completely optimal.
+ */
+ if (blksz != 0 && blksz < pbsize) {
+ /*
+ * The logical block size is smaller than the reported physical
+ * block size. Record the logical ashift so that
+ * spa_vdev_attach() can use it as a fallback.
+ */
+ vd->vdev_ashift_af = highbit64(MAX(blksz,
+ SPA_MINBLOCKSIZE)) - 1;
+ } else {
+ vd->vdev_ashift_af = 0;
+ }
+
if (vd->vdev_wholedisk == 1) {
int wce = 1;