diff options
author | Joe Stein <joe.stein@delphix.com> | 2016-04-12 13:44:02 -0700 |
---|---|---|
committer | Matthew Ahrens <mahrens@delphix.com> | 2016-04-14 09:19:36 -0700 |
commit | 215198a6ad15cf4832370e2f19247abeb36b951a (patch) | |
tree | 79e75d9a0dfe0931c1ea52fbb8b2a8f4505e2cf1 | |
parent | 11d8e1e018d047bd620f057f0fef71a61f7992a7 (diff) | |
download | illumos-joyent-215198a6ad15cf4832370e2f19247abeb36b951a.tar.gz |
6736 ZFS per-vdev ZAPs
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: John Kennedy <john.kennedy@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
26 files changed, 1028 insertions, 69 deletions
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index 457fe05e29..43d4b57d15 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -135,6 +135,7 @@ dir path=opt/zfs-tests/tests/functional/threadsappend dir path=opt/zfs-tests/tests/functional/truncate dir path=opt/zfs-tests/tests/functional/userquota dir path=opt/zfs-tests/tests/functional/utils_test +dir path=opt/zfs-tests/tests/functional/vdev_zaps dir path=opt/zfs-tests/tests/functional/write_dirs dir path=opt/zfs-tests/tests/functional/xattr dir path=opt/zfs-tests/tests/functional/zvol @@ -2169,6 +2170,16 @@ file path=opt/zfs-tests/tests/functional/utils_test/utils_test_008_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/utils_test/utils_test_009_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/cleanup mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/setup mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/vdev_zaps.kshlib mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_001_pos mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_002_pos mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_003_pos mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_004_pos mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_006_pos mode=0555 +file path=opt/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_007_pos mode=0555 file path=opt/zfs-tests/tests/functional/write_dirs/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/write_dirs/setup mode=0555 file path=opt/zfs-tests/tests/functional/write_dirs/write_dirs_001_pos \ diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index bcd19ae686..e8c27d21cb 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -523,6 +523,11 @@ tests = ['utils_test_001_pos', 'utils_test_002_pos', 'utils_test_003_pos', 'utils_test_004_pos', 'utils_test_005_pos', 'utils_test_006_pos', 'utils_test_007_pos', 'utils_test_008_pos', 'utils_test_009_pos'] +[/opt/zfs-tests/tests/functional/vdev_zaps] +tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos', + 'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos', + 'vdev_zaps_007_pos'] + [/opt/zfs-tests/tests/functional/write_dirs] tests = ['write_dirs_001_pos', 'write_dirs_002_pos'] diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index 2aadc501c1..72f3242ad2 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -522,6 +522,11 @@ tests = ['utils_test_001_pos', 'utils_test_002_pos', 'utils_test_003_pos', 'utils_test_004_pos', 'utils_test_005_pos', 'utils_test_006_pos', 'utils_test_007_pos', 'utils_test_008_pos', 'utils_test_009_pos'] +[/opt/zfs-tests/tests/functional/vdev_zaps] +tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos', + 'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos', + 'vdev_zaps_007_pos'] + [/opt/zfs-tests/tests/functional/write_dirs] tests = ['write_dirs_001_pos', 'write_dirs_002_pos'] diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run index bc6c80d491..5244544e78 100644 --- a/usr/src/test/zfs-tests/runfiles/openindiana.run +++ b/usr/src/test/zfs-tests/runfiles/openindiana.run @@ -522,6 +522,11 @@ tests = ['utils_test_001_pos', 'utils_test_002_pos', 'utils_test_003_pos', 'utils_test_004_pos', 'utils_test_005_pos', 'utils_test_006_pos', 'utils_test_007_pos', 'utils_test_008_pos', 'utils_test_009_pos'] +[/opt/zfs-tests/tests/functional/vdev_zaps] +tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos', + 'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos', + 'vdev_zaps_007_pos'] + [/opt/zfs-tests/tests/functional/write_dirs] tests = ['write_dirs_001_pos', 'write_dirs_002_pos'] diff --git a/usr/src/test/zfs-tests/tests/functional/Makefile b/usr/src/test/zfs-tests/tests/functional/Makefile index 6f3638a7dd..f2247b0219 100644 --- a/usr/src/test/zfs-tests/tests/functional/Makefile +++ b/usr/src/test/zfs-tests/tests/functional/Makefile @@ -72,6 +72,7 @@ SUBDIRS = acl \ truncate \ userquota \ utils_test \ + vdev_zaps \ write_dirs \ xattr \ zvol diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/Makefile b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/Makefile new file mode 100644 index 0000000000..904ee33767 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/Makefile @@ -0,0 +1,49 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TESTDIR = $(ROOTOPTPKG)/tests/functional/vdev_zaps + +PROGS = setup \ + vdev_zaps_001_pos \ + vdev_zaps_002_pos \ + vdev_zaps_003_pos \ + vdev_zaps_004_pos \ + vdev_zaps_005_pos \ + vdev_zaps_006_pos \ + vdev_zaps_007_pos \ + cleanup + +FILES = vdev_zaps.kshlib + +CMDS = $(PROGS:%=$(TESTDIR)/%) $(FILES:%=$(TESTDIR)/%) +$(CMDS) := FILEMODE = 0555 + +all lint clean clobber: + +install: $(CMDS) + +$(CMDS): $(TESTDIR) + +$(TESTDIR): + $(INS.dir) + +$(TESTDIR)/%: %.ksh + $(INS.rename) + +$(TESTDIR)/%: % + $(INS.file) diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/cleanup.ksh new file mode 100644 index 0000000000..04cd8b375c --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/cleanup.ksh @@ -0,0 +1,20 @@ +#!/usr/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/setup.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/setup.ksh new file mode 100644 index 0000000000..fea9671c68 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/setup.ksh @@ -0,0 +1,21 @@ +#!/usr/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +rm -rf $TESTDIR || log_fail Could not remove $TESTDIR +mkdir -p $TESTDIR || log_fail Could not create $TESTDIR diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps.kshlib b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps.kshlib new file mode 100644 index 0000000000..f783eefee3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps.kshlib @@ -0,0 +1,114 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +function get_conf_section # regex conf +{ + typeset dsk_line next_vd_line conf section + typeset regex="$1" + typeset conf="$2" + + dsk_line=$(grep -n "$regex" "$conf" | awk -F: '{print $1}') + if [[ -z "$dsk_line" ]]; then + return + fi + next_vd_line=$(tail -n +$dsk_line "$conf" | \ + grep -n "children\[" | awk -F: '{print $1}' | head -n 1) + + if [[ -n "$next_vd_line" ]]; then + section=$(cat "$conf" | sed "1,${dsk_line}d" | head -n \ + $(($next_vd_line - 2))) + + else + section=$(tail -n +$dsk_line "$conf") + fi + echo "$section" +} + +function get_leaf_vd_zap # dsk conf +{ + typeset section=$(get_conf_section "$1" "$2") + echo "$section" | egrep \ + "com.delphix:vdev_zap_leaf: [0-9]+" | awk '{print $2}' +} + +function get_top_vd_zap # dsk conf +{ + typeset section=$(get_conf_section "$1" "$2") + echo "$section" | egrep \ + "com.delphix:vdev_zap_top: [0-9]+" | awk '{print $2}' +} + +function assert_has_sentinel # conf +{ + res=$(grep "com.delphix:has_per_vdev_zaps" "$1") + [[ -z "$res" ]] && log_fail "Pool missing ZAP feature sentinel value" +} + +function assert_zap_common # pool vd lvl zapobj +{ + typeset pool=$1 + typeset vd="$2" + typeset lvl=$3 + typeset zapobj=$4 + + if [[ -z "$zapobj" ]]; then + log_fail "$vd on $pool has no $lvl ZAP in config" + elif [[ -z "$(zdb -d $pool $zapobj | grep 'zap')" ]]; then + log_fail "$vd on $pool has no $lvl ZAP in MOS" + fi +} + +function assert_top_zap # pool vd conf +{ + typeset pool=$1 + typeset vd="$2" + typeset conf=$3 + + top_zap=$(get_top_vd_zap "$vd" $conf) + assert_zap_common $pool "$vd" "top" $top_zap +} + +function assert_leaf_zap # pool vd conf +{ + typeset pool=$1 + typeset vd="$2" + typeset conf=$3 + + leaf_zap=$(get_leaf_vd_zap "$vd" $conf) + assert_zap_common $pool "$vd" "leaf" $leaf_zap +} + +# +# Code common to setup/teardown for each test. +# + +function cleanup +{ + if datasetexists $TESTPOOL ; then + log_must zpool destroy -f $TESTPOOL + fi + if [[ -e $conf ]]; then + log_must $RM -f "$conf" + fi + if [[ -e $POOL2 ]]; then + log_must zpool destroy -f $POOL2 + fi +} + +log_onexit cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_001_pos.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_001_pos.ksh new file mode 100644 index 0000000000..fe7dff6570 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_001_pos.ksh @@ -0,0 +1,42 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +# +# Description: +# Verify that per-vdev ZAPs are created with one vdev. +# +# Strategy: +# 1. Create a pool with one disk. +# 2. Verify that the disk has a top and leaf ZAP in its config and the MOS. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/vdev_zaps/vdev_zaps.kshlib + +log_assert "Per-vdev ZAPs are created on pool creation with one disk." + +DISK=${DISKS%% *} + +log_must zpool create -f $TESTPOOL $DISK +conf="$TESTDIR/vz001" +log_must zdb -PC $TESTPOOL > $conf + +assert_top_zap $TESTPOOL $DISK "$conf" +assert_leaf_zap $TESTPOOL $DISK "$conf" +assert_has_sentinel "$conf" + +log_pass "Per-vdev ZAPs are created in a one-disk pool." diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_002_pos.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_002_pos.ksh new file mode 100644 index 0000000000..01d04ab156 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_002_pos.ksh @@ -0,0 +1,44 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +# +# Description: +# Verify that per-vdev ZAPs are created with multiple vdevs. +# +# Strategy: +# 1. Create a pool with multiple disks. +# 2. Verify that each has both a top and leaf zap. +# 3. Verify that each of those ZAPs exists in the MOS. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/vdev_zaps/vdev_zaps.kshlib + +log_assert "Per-vdev ZAPs are created on pool creation with many disks." + +log_must zpool create -f $TESTPOOL $DISKS + +conf="$TESTDIR/vz002" +log_must zdb -PC $TESTPOOL > $conf + +assert_has_sentinel "$conf" +for DISK in $DISKS; do + assert_top_zap $TESTPOOL $DISK "$conf" + assert_leaf_zap $TESTPOOL $DISK "$conf" +done + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_003_pos.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_003_pos.ksh new file mode 100644 index 0000000000..b6d9ffbda1 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_003_pos.ksh @@ -0,0 +1,47 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +# +# Description: +# Verify that per-vdev ZAPs are created with multi-level vdev tree. +# +# Strategy: +# 1. Create a pool with a multi-disk mirror. +# 2. Verify that mirror has top ZAP but no leaf ZAP. +# 3. Verify that each disk has a leaf ZAP but no top ZAP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/vdev_zaps/vdev_zaps.kshlib + +log_assert "Per-vdev ZAPs are created on pool creation with multi-level vdev "\ + "trees." + +log_must zpool create -f $TESTPOOL mirror $DISKS + +conf="$TESTDIR/vz003" +log_must zdb -PC $TESTPOOL > $conf + +assert_has_sentinel "$conf" +assert_top_zap $TESTPOOL "type: 'mirror'" "$conf" +for DISK in $DISKS; do + assert_leaf_zap $TESTPOOL $DISK "$conf" + top_zap=$(get_top_vd_zap $DISK "$conf") + [[ -n "$top_zap" ]] && log_fail "Leaf vdev $DISK has top-level ZAP." +done + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_004_pos.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_004_pos.ksh new file mode 100644 index 0000000000..a84de65777 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_004_pos.ksh @@ -0,0 +1,94 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +# +# Description: +# Verify that per-vdev ZAPs are properly transferred on attach/detach. +# +# Strategy: +# 1. Create a pool with one disk. Verify that it has a top and leaf ZAP. +# 2. Attach a disk. +# 3. Verify that top-level and leaf-level ZAPs were transferred properly. +# 4. Verify that the newly-attached disk has a leaf ZAP. +# 5. Detach the original disk. +# 6. Verify that top-level and leaf-level ZAPs were transferred properly. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/vdev_zaps/vdev_zaps.kshlib + +log_assert "Per-vdev ZAPs are transferred properly on attach/detach" + +DISK=${DISKS%% *} +log_must zpool create -f $TESTPOOL $DISK + +# Make the pool. +conf="$TESTDIR/vz004" +log_must zdb -PC $TESTPOOL > $conf +assert_has_sentinel "$conf" +orig_top=$(get_top_vd_zap $DISK $conf) +orig_leaf=$(get_leaf_vd_zap $DISK $conf) +assert_zap_common $TESTPOOL $DISK "top" $orig_top + +# +# Attach a disk. +# + +disk2=$(echo $DISKS | awk '{print $2}') +log_must zpool attach $TESTPOOL $DISK $disk2 +log_must zdb -PC $TESTPOOL > $conf + +# Ensure top-level ZAP was transferred successfully. +new_top=$(get_top_vd_zap "type: 'mirror'" $conf) +if [[ "$new_top" -ne "$orig_top" ]]; then + log_fail "Top-level ZAP wasn't transferred successfully on attach." +fi + +# Ensure leaf ZAP of original disk was transferred successfully. +new_leaf=$(get_leaf_vd_zap $DISK $conf) +if [[ "$new_leaf" -ne "$orig_leaf" ]]; then + log_fail "$DISK used to have leaf-level ZAP $orig_leaf, now has "\ + "$new_leaf" +fi +# Ensure original disk no longer has top-level ZAP. +dsk1_top=$(get_top_vd_zap $DISK $conf) +[[ -n "$dsk1_top" ]] && log_fail "$DISK has top-level ZAP, but is only leaf." + +# Ensure attached disk got a leaf-level ZAP but not a top-level ZAP. +dsk2_top=$(get_top_vd_zap $disk2 $conf) +dsk2_leaf=$(get_leaf_vd_zap $disk2 $conf) +[[ -n "$dsk2_top" ]] && log_fail "Attached disk $disk2 has top ZAP." +[[ -z "$dsk2_leaf" ]] && log_fail "Attached disk $disk2 has no leaf ZAP." + +# +# Detach original disk. +# + +log_must zpool detach $TESTPOOL $DISK +log_must zdb -PC $TESTPOOL > $conf + +final_top=$(get_top_vd_zap $disk2 $conf) +final_leaf=$(get_leaf_vd_zap $disk2 $conf) +# Make sure top ZAP was successfully transferred. +[[ "$final_top" -ne "$orig_top" ]] && log_fail "Lost top-level ZAP when "\ + "promoting $disk2 (expected $orig_top, found $final_top)" + +# Make sure leaf ZAP was successfully transferred. +[[ "$final_leaf" -ne "$dsk2_leaf" ]] && log_fail "$disk2 lost its leaf ZAP "\ + "on promotion (expected $dsk2_leaf, got $final_leaf)" + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh new file mode 100644 index 0000000000..8cf8e6d405 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_005_pos.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +# +# Description: +# Verify that per-vdev ZAPs persist when the pool is exported and imported. +# +# Strategy: +# 1. Create a pool with a disk. +# 2. Export the pool and re-import it. +# 3. Verify that the ZAPs aren't different. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/vdev_zaps/vdev_zaps.kshlib + +log_assert "Per-vdev ZAPs persist across export/import." + +DISK=${DISKS%% *} +log_must zpool create -f $TESTPOOL $DISK + +# Make the pool. +conf="$TESTDIR/vz005" +log_must zdb -PC $TESTPOOL > $conf +assert_has_sentinel "$conf" +orig_top=$(get_top_vd_zap $DISK $conf) +orig_leaf=$(get_leaf_vd_zap $DISK $conf) +assert_zap_common $TESTPOOL $DISK "top" $orig_top +assert_zap_common $TESTPOOL $DISK "leaf" $orig_leaf + +# Export the pool. +log_must zpool export $TESTPOOL + +# Import the pool. +log_must zpool import $TESTPOOL + +# Verify that ZAPs persisted. +log_must zdb -PC $TESTPOOL > $conf + +new_top=$(get_top_vd_zap $DISK $conf) +new_leaf=$(get_leaf_vd_zap $DISK $conf) + +[[ "$new_top" -ne "$orig_top" ]] && log_fail "Top ZAP ($new_top) after "\ + "import does not match top ZAP before export ($orig_top)" +[[ "$new_leaf" -ne "$orig_leaf" ]] && log_fail "Leaf ZAP ($new_leaf) after "\ + "import does not match leaf ZAP before export ($orig_leaf)" + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_006_pos.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_006_pos.ksh new file mode 100644 index 0000000000..0476bcda91 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_006_pos.ksh @@ -0,0 +1,46 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015, 2016 by Delphix. All rights reserved. +# + +# +# Description: +# Verify that top-level per-vdev ZAPs are created for added devices +# +# Strategy: +# 1. Create a pool with one disk. +# 2. Add a disk. +# 3. Verify its ZAPs were created. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/vdev_zaps/vdev_zaps.kshlib + +DISK_ARR=($DISKS) +DISK=${DISK_ARR[0]} +log_must zpool create -f $TESTPOOL $DISK + +log_assert "Per-vdev ZAPs are created for added vdevs." + +log_must zpool add -f $TESTPOOL ${DISK_ARR[1]} +conf="$TESTDIR/vz006" +log_must zdb -PC $TESTPOOL > $conf + +assert_has_sentinel "$conf" +orig_top=$(get_top_vd_zap ${DISK_ARR[1]} $conf) +assert_zap_common $TESTPOOL ${DISK_ARR[1]} "top" $orig_top +assert_leaf_zap $TESTPOOL ${DISK_ARR[1]} "$conf" + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_007_pos.ksh b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_007_pos.ksh new file mode 100644 index 0000000000..1f71b11ee5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_007_pos.ksh @@ -0,0 +1,74 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +# +# Description: +# Verify that ZAPs are handled properly during mirror pool splitting. +# +# Strategy: +# 1. Create a pool with a two-way mirror. +# 2. Split the pool. +# 3. Verify that the ZAPs in the old pool persisted. +# 4. Import the new pool. +# 5. Verify that the ZAPs in the new pool persisted. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/vdev_zaps/vdev_zaps.kshlib + +DISK_ARR=($DISKS) +POOL2=${TESTPOOL}2 +log_must zpool create -f $TESTPOOL mirror ${DISK_ARR[0]} ${DISK_ARR[1]} + +log_assert "Per-vdev ZAPs persist correctly on the original pool after split." +conf="$TESTDIR/vz007" +log_must zdb -PC $TESTPOOL > $conf + +assert_has_sentinel "$conf" +orig_top=$(get_top_vd_zap "type: 'mirror'" $conf) +orig_leaf0=$(get_leaf_vd_zap ${DISK_ARR[0]} $conf) +orig_leaf1=$(get_leaf_vd_zap ${DISK_ARR[1]} $conf) +assert_zap_common $TESTPOOL "type: 'mirror'" "top" $orig_top +assert_zap_common $TESTPOOL ${DISK_ARR[0]} "leaf" $orig_leaf0 +assert_zap_common $TESTPOOL ${DISK_ARR[1]} "leaf" $orig_leaf1 + +log_must zpool split $TESTPOOL $POOL2 ${DISK_ARR[1]} + +# Make sure old pool's ZAPs are consistent. +log_must zdb -PC $TESTPOOL > $conf +new_leaf0=$(get_leaf_vd_zap ${DISK_ARR[0]} $conf) +new_top_s0=$(get_top_vd_zap ${DISK_ARR[0]} $conf) + +[[ "$new_leaf0" -ne "$orig_leaf0" ]] && log_fail "Leaf ZAP in original pool "\ + "didn't persist (expected $orig_leaf0, got $new_leaf0)" +[[ "$new_top_s0" -ne "$orig_top" ]] && log_fail "Top ZAP in original pool "\ + "didn't persist (expected $orig_top, got $new_top_s0)" + +log_assert "Per-vdev ZAPs persist on the new pool after import." + +# Import the split pool. +log_must zpool import $POOL2 +log_must zdb -PC $TESTPOOL > $conf + +new_leaf1=$(get_leaf_vd_zap ${DISK_ARR[1]} $conf) +new_top_s1=$(get_top_vd_zap ${DISK_ARR[1]} $conf) +[[ "$new_leaf1" -ne "$orig_leaf1" ]] && log_fail "Leaf ZAP in new pool "\ + "didn't persist (expected $orig_leaf1, got $new_leaf1)" +[[ "$new_top_s1" -ne "$orig_top" ]] && log_fail "Top ZAP in new pool "\ + "didn't persist (expected $orig_top, got $new_top_s1)" + +log_pass diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index decc58fdb3..32139f0aa4 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -1610,6 +1610,19 @@ spa_check_removed(vdev_t *vd) } } +static void +spa_config_valid_zaps(vdev_t *vd, vdev_t *mvd) +{ + ASSERT3U(vd->vdev_children, ==, mvd->vdev_children); + + vd->vdev_top_zap = mvd->vdev_top_zap; + vd->vdev_leaf_zap = mvd->vdev_leaf_zap; + + for (uint64_t i = 0; i < vd->vdev_children; i++) { + spa_config_valid_zaps(vd->vdev_child[i], mvd->vdev_child[i]); + } +} + /* * Validate the current config against the MOS config */ @@ -1713,16 +1726,25 @@ spa_config_valid(spa_t *spa, nvlist_t *config) spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_reopen(rvd); - } else if (mtvd->vdev_islog) { + } else { + if (mtvd->vdev_islog) { + /* + * Load the slog device's state from the MOS + * config since it's possible that the label + * does not contain the most up-to-date + * information. + */ + vdev_load_log_state(tvd, mtvd); + vdev_reopen(tvd); + } + /* - * Load the slog device's state from the MOS config - * since it's possible that the label does not - * contain the most up-to-date information. + * Per-vdev ZAP info is stored exclusively in the MOS. */ - vdev_load_log_state(tvd, mtvd); - vdev_reopen(tvd); + spa_config_valid_zaps(tvd, mtvd); } } + vdev_free(mrvd); spa_config_exit(spa, SCL_ALL, FTAG); @@ -2140,6 +2162,34 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, } /* + * Count the number of per-vdev ZAPs associated with all of the vdevs in the + * vdev tree rooted in the given vd, and ensure that each ZAP is present in the + * spa's per-vdev ZAP list. + */ +static uint64_t +vdev_count_verify_zaps(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + uint64_t total = 0; + if (vd->vdev_top_zap != 0) { + total++; + ASSERT0(zap_lookup_int(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, vd->vdev_top_zap)); + } + if (vd->vdev_leaf_zap != 0) { + total++; + ASSERT0(zap_lookup_int(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, vd->vdev_leaf_zap)); + } + + for (uint64_t i = 0; i < vd->vdev_children; i++) { + total += vdev_count_verify_zaps(vd->vdev_child[i]); + } + + return (total); +} + +/* * Load an existing storage pool, using the pool's builtin spa_config as a * source of configuration information. */ @@ -2568,6 +2618,39 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* + * Load the per-vdev ZAP map. If we have an older pool, this will not + * be present; in this case, defer its creation to a later time to + * avoid dirtying the MOS this early / out of sync context. See + * spa_sync_config_object. + */ + + /* The sentinel is only available in the MOS config. */ + nvlist_t *mos_config; + if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + + error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP, + &spa->spa_all_vdev_zaps); + + if (error != ENOENT && error != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } else if (error == 0 && !nvlist_exists(mos_config, + ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) { + /* + * An older version of ZFS overwrote the sentinel value, so + * we have orphaned per-vdev ZAPs in the MOS. Defer their + * destruction to later; see spa_sync_config_object. + */ + spa->spa_avz_action = AVZ_ACTION_DESTROY; + /* + * We're assuming that no vdevs have had their ZAPs created + * before this. Better be sure of it. + */ + ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); + } + nvlist_free(mos_config); + + /* * If we're assembling the pool from the split-off vdevs of * an existing pool, we don't want to attach the spares & cache * devices. @@ -5039,6 +5122,16 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, vml[c]->vdev_top->vdev_asize) == 0); VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, vml[c]->vdev_top->vdev_ashift) == 0); + + /* transfer per-vdev ZAPs */ + ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0); + VERIFY0(nvlist_add_uint64(child[c], + ZPOOL_CONFIG_VDEV_LEAF_ZAP, vml[c]->vdev_leaf_zap)); + + ASSERT3U(vml[c]->vdev_top->vdev_top_zap, !=, 0); + VERIFY0(nvlist_add_uint64(child[c], + ZPOOL_CONFIG_VDEV_TOP_ZAP, + vml[c]->vdev_parent->vdev_top_zap)); } if (error != 0) { @@ -5080,11 +5173,13 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, spa->spa_config_txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_generate_guid(NULL)) == 0); + VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); (void) nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); /* add the new pool to the namespace */ newspa = spa_add(newname, config, altroot); + newspa->spa_avz_action = AVZ_ACTION_REBUILD; newspa->spa_config_txg = spa->spa_config_txg; spa_set_log_state(newspa, SPA_LOG_CLEAR); @@ -5142,9 +5237,11 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, if (error == 0) spa_history_log_internal(spa, "detach", tx, "vdev=%s", vml[c]->vdev_path); + vdev_free(vml[c]); } } + spa->spa_avz_action = AVZ_ACTION_REBUILD; vdev_config_dirty(spa->spa_root_vdev); spa->spa_config_splitting = NULL; nvlist_free(nvl); @@ -5983,16 +6080,118 @@ spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, sav->sav_sync = B_FALSE; } +/* + * Rebuild spa's all-vdev ZAP from the vdev ZAPs indicated in each vdev_t. + * The all-vdev ZAP must be empty. + */ +static void +spa_avz_build(vdev_t *vd, uint64_t avz, dmu_tx_t *tx) +{ + spa_t *spa = vd->vdev_spa; + if (vd->vdev_top_zap != 0) { + VERIFY0(zap_add_int(spa->spa_meta_objset, avz, + vd->vdev_top_zap, tx)); + } + if (vd->vdev_leaf_zap != 0) { + VERIFY0(zap_add_int(spa->spa_meta_objset, avz, + vd->vdev_leaf_zap, tx)); + } + for (uint64_t i = 0; i < vd->vdev_children; i++) { + spa_avz_build(vd->vdev_child[i], avz, tx); + } +} + static void spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) { nvlist_t *config; - if (list_is_empty(&spa->spa_config_dirty_list)) + /* + * If the pool is being imported from a pre-per-vdev-ZAP version of ZFS, + * its config may not be dirty but we still need to build per-vdev ZAPs. + * Similarly, if the pool is being assembled (e.g. after a split), we + * need to rebuild the AVZ although the config may not be dirty. + */ + if (list_is_empty(&spa->spa_config_dirty_list) && + spa->spa_avz_action == AVZ_ACTION_NONE) return; spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + ASSERT(spa->spa_avz_action == AVZ_ACTION_NONE || + spa->spa_all_vdev_zaps != 0); + + if (spa->spa_avz_action == AVZ_ACTION_REBUILD) { + /* Make and build the new AVZ */ + uint64_t new_avz = zap_create(spa->spa_meta_objset, + DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); + spa_avz_build(spa->spa_root_vdev, new_avz, tx); + + /* Diff old AVZ with new one */ + zap_cursor_t zc; + zap_attribute_t za; + + for (zap_cursor_init(&zc, spa->spa_meta_objset, + spa->spa_all_vdev_zaps); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + uint64_t vdzap = za.za_first_integer; + if (zap_lookup_int(spa->spa_meta_objset, new_avz, + vdzap) == ENOENT) { + /* + * ZAP is listed in old AVZ but not in new one; + * destroy it + */ + VERIFY0(zap_destroy(spa->spa_meta_objset, vdzap, + tx)); + } + } + + zap_cursor_fini(&zc); + + /* Destroy the old AVZ */ + VERIFY0(zap_destroy(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, tx)); + + /* Replace the old AVZ in the dir obj with the new one */ + VERIFY0(zap_update(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, + sizeof (new_avz), 1, &new_avz, tx)); + + spa->spa_all_vdev_zaps = new_avz; + } else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) { + zap_cursor_t zc; + zap_attribute_t za; + + /* Walk through the AVZ and destroy all listed ZAPs */ + for (zap_cursor_init(&zc, spa->spa_meta_objset, + spa->spa_all_vdev_zaps); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + uint64_t zap = za.za_first_integer; + VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx)); + } + + zap_cursor_fini(&zc); + + /* Destroy and unlink the AVZ itself */ + VERIFY0(zap_destroy(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, tx)); + VERIFY0(zap_remove(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx)); + spa->spa_all_vdev_zaps = 0; + } + + if (spa->spa_all_vdev_zaps == 0) { + spa->spa_all_vdev_zaps = zap_create_link(spa->spa_meta_objset, + DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_VDEV_ZAP_MAP, tx); + } + spa->spa_avz_action = AVZ_ACTION_NONE; + + /* Create ZAPs for vdevs that don't have them. */ + vdev_construct_zaps(spa->spa_root_vdev, tx); + config = spa_config_generate(spa, spa->spa_root_vdev, dmu_tx_get_txg(tx), B_FALSE); @@ -6393,6 +6592,21 @@ spa_sync(spa_t *spa, uint64_t txg) } while (dmu_objset_is_dirty(mos, txg)); + if (!list_is_empty(&spa->spa_config_dirty_list)) { + /* + * Make sure that the number of ZAPs for all the vdevs matches + * the number of ZAPs in the per-vdev ZAP list. This only gets + * called if the config is dirty; otherwise there may be + * outstanding AVZ operations that weren't completed in + * spa_sync_config_object. + */ + uint64_t all_vdev_zap_entry_count; + ASSERT0(zap_count(spa->spa_meta_objset, + spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count)); + ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==, + all_vdev_zap_entry_count); + } + /* * Rewrite the vdev configuration (which includes the uberblock) * to commit the transaction group. diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index 69fe1f8e77..6f44dfa270 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. */ #include <sys/spa.h> @@ -124,7 +124,7 @@ spa_config_load(void) if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) continue; - VERIFY(nvpair_value_nvlist(nvpair, &child) == 0); + child = fnvpair_value_nvlist(nvpair); if (spa_lookup(nvpair_name(nvpair)) != NULL) continue; @@ -162,14 +162,9 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) /* * Pack the configuration into a buffer. */ - VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0); - - buf = kmem_alloc(buflen, KM_SLEEP); + buf = fnvlist_pack(nvl, &buflen); temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR, - KM_SLEEP) == 0); - /* * Write the configuration to disk. We need to do the traditional * 'write to temporary file, sync, move over original' to make sure we @@ -191,7 +186,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) (void) vn_remove(temp, UIO_SYSSPACE, RMFILE); - kmem_free(buf, buflen); + fnvlist_pack_free(buf, buflen); kmem_free(temp, MAXPATHLEN); return (err); } @@ -256,11 +251,10 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) } if (nvl == NULL) - VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, - KM_SLEEP) == 0); + nvl = fnvlist_alloc(); - VERIFY(nvlist_add_nvlist(nvl, spa->spa_name, - spa->spa_config) == 0); + fnvlist_add_nvlist(nvl, spa->spa_name, + spa->spa_config); mutex_exit(&spa->spa_props_lock); } @@ -322,15 +316,15 @@ spa_all_configs(uint64_t *generation) if (*generation == spa_config_generation) return (NULL); - VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0); + pools = fnvlist_alloc(); mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { if (INGLOBALZONE(curproc) || zone_dataset_visible(spa_name(spa), NULL)) { mutex_enter(&spa->spa_props_lock); - VERIFY(nvlist_add_nvlist(pools, spa_name(spa), - spa->spa_config) == 0); + fnvlist_add_nvlist(pools, spa_name(spa), + spa->spa_config); mutex_exit(&spa->spa_props_lock); } } @@ -379,21 +373,17 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) if (txg == -1ULL) txg = spa->spa_config_txg; - VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, - spa_version(spa)) == 0); - VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, - spa_name(spa)) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, - spa_state(spa)) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, - txg) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, - spa_guid(spa)) == 0); - VERIFY(spa->spa_comment == NULL || nvlist_add_string(config, - ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0); + config = fnvlist_alloc(); + fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)); + fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, spa_name(spa)); + fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)); + fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg); + fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)); + if (spa->spa_comment != NULL) { + fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, + spa->spa_comment); + } #ifdef _KERNEL hostid = zone_get_hostid(NULL); @@ -405,23 +395,24 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); #endif /* _KERNEL */ if (hostid != 0) { - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, - hostid) == 0); + fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid); } - VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, - utsname.nodename) == 0); + fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname.nodename); + int config_gen_flags = 0; if (vd != rvd) { - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, - vd->vdev_top->vdev_guid) == 0); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, - vd->vdev_guid) == 0); - if (vd->vdev_isspare) - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, - 1ULL) == 0); - if (vd->vdev_islog) - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, - 1ULL) == 0); + fnvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, + vd->vdev_top->vdev_guid); + fnvlist_add_uint64(config, ZPOOL_CONFIG_GUID, + vd->vdev_guid); + if (vd->vdev_isspare) { + fnvlist_add_uint64(config, + ZPOOL_CONFIG_IS_SPARE, 1ULL); + } + if (vd->vdev_islog) { + fnvlist_add_uint64(config, + ZPOOL_CONFIG_IS_LOG, 1ULL); + } vd = vd->vdev_top; /* label contains top config */ } else { /* @@ -429,8 +420,12 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) * in the mos config, and not in the vdev labels */ if (spa->spa_config_splitting != NULL) - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT, - spa->spa_config_splitting) == 0); + fnvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT, + spa->spa_config_splitting); + fnvlist_add_boolean(config, + ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS); + + config_gen_flags |= VDEV_CONFIG_MOS; } /* @@ -445,19 +440,19 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) if (spa->spa_config_splitting != NULL && nvlist_lookup_uint64(spa->spa_config_splitting, ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) { - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, - split_guid) == 0); + fnvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, + split_guid); } - nvroot = vdev_config_generate(spa, vd, getstats, 0); - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); + nvroot = vdev_config_generate(spa, vd, getstats, config_gen_flags); + fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot); nvlist_free(nvroot); /* * Store what's necessary for reading the MOS in the label. */ - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, - spa->spa_label_features) == 0); + fnvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, + spa->spa_label_features); if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { ddt_histogram_t *ddh; @@ -466,23 +461,23 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddt_get_dedup_histogram(spa, ddh); - VERIFY(nvlist_add_uint64_array(config, + fnvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, - (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0); + (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)); kmem_free(ddh, sizeof (ddt_histogram_t)); ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); ddt_get_dedup_object_stats(spa, ddo); - VERIFY(nvlist_add_uint64_array(config, + fnvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, - (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0); + (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)); kmem_free(ddo, sizeof (ddt_object_t)); dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); ddt_get_dedup_stats(spa, dds); - VERIFY(nvlist_add_uint64_array(config, + fnvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, - (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0); + (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)); kmem_free(dds, sizeof (ddt_stat_t)); } diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index adad5ab143..6b8649af40 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -321,6 +321,7 @@ typedef struct dmu_buf { #define DMU_POOL_BPTREE_OBJ "bptree_obj" #define DMU_POOL_EMPTY_BPOBJ "empty_bpobj" #define DMU_POOL_CHECKSUM_SALT "org.illumos:checksum_salt" +#define DMU_POOL_VDEV_ZAP_MAP "com.delphix:vdev_zap_map" /* * Allocate an object from this objset. The range of object numbers diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index 4418001982..e92fd4aa79 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -116,6 +116,12 @@ typedef struct spa_taskqs { taskq_t **stqs_taskq; } spa_taskqs_t; +typedef enum spa_all_vdev_zap_action { + AVZ_ACTION_NONE = 0, + AVZ_ACTION_DESTROY, /* Destroy all per-vdev ZAPs and the AVZ. */ + AVZ_ACTION_REBUILD /* Populate the new AVZ, see spa_avz_rebuild */ +} spa_avz_action_t; + struct spa { /* * Fields protected by spa_namespace_lock. @@ -253,6 +259,8 @@ struct spa { uint64_t spa_deadman_calls; /* number of deadman calls */ hrtime_t spa_sync_starttime; /* starting time fo spa_sync */ uint64_t spa_deadman_synctime; /* deadman expiration timer */ + uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */ + spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */ /* * spa_iokstat_lock protects spa_iokstat and diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h index 1f8b118715..cd221e07ab 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h @@ -71,6 +71,10 @@ extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, extern boolean_t vdev_dtl_required(vdev_t *vd); extern boolean_t vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp); +extern void vdev_destroy_unlink_zap(vdev_t *vd, uint64_t zapobj, + dmu_tx_t *tx); +extern uint64_t vdev_create_link_zap(vdev_t *vd, dmu_tx_t *tx); +extern void vdev_construct_zaps(vdev_t *vd, dmu_tx_t *tx); extern void vdev_hold(vdev_t *); extern void vdev_rele(vdev_t *); @@ -130,7 +134,8 @@ extern void vdev_state_clean(vdev_t *vd); typedef enum vdev_config_flag { VDEV_CONFIG_SPARE = 1 << 0, VDEV_CONFIG_L2CACHE = 1 << 1, - VDEV_CONFIG_REMOVING = 1 << 2 + VDEV_CONFIG_REMOVING = 1 << 2, + VDEV_CONFIG_MOS = 1 << 3 } vdev_config_flag_t; extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config); diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 17a18a3199..3f1b7d8a54 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -172,6 +172,7 @@ struct vdev { uint64_t vdev_islog; /* is an intent log device */ uint64_t vdev_removing; /* device is being removed? */ boolean_t vdev_ishole; /* is a hole in the namespace */ + uint64_t vdev_top_zap; /* * Leaf vdev state. @@ -210,6 +211,7 @@ struct vdev { spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */ zio_t *vdev_probe_zio; /* root of current probe */ vdev_aux_t vdev_label_aux; /* on-disk aux state */ + uint64_t vdev_leaf_zap; /* * For DTrace to work in userland (libzpool) context, these fields must diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 08ff8871e9..6f30154d73 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -515,6 +515,10 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_asize); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVING, &vd->vdev_removing); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP, + &vd->vdev_top_zap); + } else { + ASSERT0(vd->vdev_top_zap); } if (parent && !parent->vdev_parent && alloctype != VDEV_ALLOC_ATTACH) { @@ -526,9 +530,18 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, spa_log_class(spa) : spa_normal_class(spa), vd); } + if (vd->vdev_ops->vdev_op_leaf && + (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_SPLIT)) { + (void) nvlist_lookup_uint64(nv, + ZPOOL_CONFIG_VDEV_LEAF_ZAP, &vd->vdev_leaf_zap); + } else { + ASSERT0(vd->vdev_leaf_zap); + } + /* * If we're a leaf vdev, try to load the DTL object and other state. */ + if (vd->vdev_ops->vdev_op_leaf && (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE || alloctype == VDEV_ALLOC_ROOTPOOL)) { @@ -689,10 +702,12 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) tvd->vdev_ms_array = svd->vdev_ms_array; tvd->vdev_ms_shift = svd->vdev_ms_shift; tvd->vdev_ms_count = svd->vdev_ms_count; + tvd->vdev_top_zap = svd->vdev_top_zap; svd->vdev_ms_array = 0; svd->vdev_ms_shift = 0; svd->vdev_ms_count = 0; + svd->vdev_top_zap = 0; if (tvd->vdev_mg) ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg); @@ -1934,6 +1949,49 @@ vdev_dtl_load(vdev_t *vd) } void +vdev_destroy_unlink_zap(vdev_t *vd, uint64_t zapobj, dmu_tx_t *tx) +{ + spa_t *spa = vd->vdev_spa; + + VERIFY0(zap_destroy(spa->spa_meta_objset, zapobj, tx)); + VERIFY0(zap_remove_int(spa->spa_meta_objset, spa->spa_all_vdev_zaps, + zapobj, tx)); +} + +uint64_t +vdev_create_link_zap(vdev_t *vd, dmu_tx_t *tx) +{ + spa_t *spa = vd->vdev_spa; + uint64_t zap = zap_create(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, + DMU_OT_NONE, 0, tx); + + ASSERT(zap != 0); + VERIFY0(zap_add_int(spa->spa_meta_objset, spa->spa_all_vdev_zaps, + zap, tx)); + + return (zap); +} + +void +vdev_construct_zaps(vdev_t *vd, dmu_tx_t *tx) +{ + if (vd->vdev_ops != &vdev_hole_ops && + vd->vdev_ops != &vdev_missing_ops && + vd->vdev_ops != &vdev_root_ops && + !vd->vdev_top->vdev_removing) { + if (vd->vdev_ops->vdev_op_leaf && vd->vdev_leaf_zap == 0) { + vd->vdev_leaf_zap = vdev_create_link_zap(vd, tx); + } + if (vd == vd->vdev_top && vd->vdev_top_zap == 0) { + vd->vdev_top_zap = vdev_create_link_zap(vd, tx); + } + } + for (uint64_t i = 0; i < vd->vdev_children; i++) { + vdev_construct_zaps(vd->vdev_child[i], tx); + } +} + +void vdev_dtl_sync(vdev_t *vd, uint64_t txg) { spa_t *spa = vd->vdev_spa; @@ -1955,6 +2013,18 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg) space_map_close(vd->vdev_dtl_sm); vd->vdev_dtl_sm = NULL; mutex_exit(&vd->vdev_dtl_lock); + + /* + * We only destroy the leaf ZAP for detached leaves or for + * removed log devices. Removed data devices handle leaf ZAP + * cleanup later, once cancellation is no longer possible. + */ + if (vd->vdev_leaf_zap != 0 && (vd->vdev_detached || + vd->vdev_top->vdev_islog)) { + vdev_destroy_unlink_zap(vd, vd->vdev_leaf_zap, tx); + vd->vdev_leaf_zap = 0; + } + dmu_tx_commit(tx); return; } @@ -2157,6 +2227,8 @@ vdev_remove(vdev_t *vd, uint64_t txg) dmu_tx_t *tx; tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg); + ASSERT(vd == vd->vdev_top); + ASSERT3U(txg, ==, spa_syncing_txg(spa)); if (vd->vdev_ms != NULL) { metaslab_group_t *mg = vd->vdev_mg; @@ -2198,6 +2270,11 @@ vdev_remove(vdev_t *vd, uint64_t txg) (void) dmu_object_free(mos, vd->vdev_ms_array, tx); vd->vdev_ms_array = 0; } + + if (vd->vdev_islog && vd->vdev_top_zap != 0) { + vdev_destroy_unlink_zap(vd, vd->vdev_top_zap, tx); + vd->vdev_top_zap = 0; + } dmu_tx_commit(tx); } diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index b3daecb9f2..866046315c 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -291,6 +291,20 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_crtxg) fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg); + if (flags & VDEV_CONFIG_MOS) { + if (vd->vdev_leaf_zap != 0) { + ASSERT(vd->vdev_ops->vdev_op_leaf); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_LEAF_ZAP, + vd->vdev_leaf_zap); + } + + if (vd->vdev_top_zap != 0) { + ASSERT(vd == vd->vdev_top); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP, + vd->vdev_top_zap); + } + } + if (getstats) { vdev_stat_t vs; pool_scan_stat_t ps; diff --git a/usr/src/uts/common/fs/zfs/zap.c b/usr/src/uts/common/fs/zfs/zap.c index c3bb19e11f..26bb8d4b09 100644 --- a/usr/src/uts/common/fs/zfs/zap.c +++ b/usr/src/uts/common/fs/zfs/zap.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ @@ -962,8 +962,8 @@ zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj, uint64_t new_obj; VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0); - VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj, - tx) == 0); + VERIFY0(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj, + tx)); return (new_obj); } diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 5d52f819cb..7ecd889c5e 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -552,6 +552,9 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_CAN_RDONLY "can_rdonly" /* not stored on disk */ #define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" #define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */ +#define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top" +#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf" +#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps" /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such |