diff options
author | Chunwei Chen <david.chen@nutanix.com> | 2019-11-07 11:20:28 +0000 |
---|---|---|
committer | Andy Fiddaman <omnios@citrus-it.co.uk> | 2019-11-11 20:21:17 +0000 |
commit | d8849d7dee03b84a3fa281ec65eb9e3d86d3756b (patch) | |
tree | 9a9a9671c199b369a3c4f8efff69b3b926412604 /usr | |
parent | ad234cdc80febfeac2ea24977ab7cf9a7cc466ba (diff) | |
download | illumos-joyent-d8849d7dee03b84a3fa281ec65eb9e3d86d3756b.tar.gz |
11943 Fix out-of-order ZIL txtype lost on hardlinked files
11942 Panic on zil/slog replay when TX_REMOVE followed by TX_CREATE
Portions contributed by: Ryan Moeller <ryan@freqlabs.com>
Portions contributed by: Andy Fiddaman <andy@omniosce.org>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr')
32 files changed, 464 insertions, 49 deletions
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index 0f9f6932d9..4f76b99667 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -2988,6 +2988,8 @@ file path=opt/zfs-tests/tests/functional/slog/slog_012_neg mode=0555 file path=opt/zfs-tests/tests/functional/slog/slog_013_pos mode=0555 file path=opt/zfs-tests/tests/functional/slog/slog_014_pos mode=0555 file path=opt/zfs-tests/tests/functional/slog/slog_015_neg mode=0555 +file path=opt/zfs-tests/tests/functional/slog/slog_replay_fs_001 mode=0555 +file path=opt/zfs-tests/tests/functional/slog/slog_replay_fs_002 mode=0555 file path=opt/zfs-tests/tests/functional/snapshot/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/snapshot/clone_001_pos mode=0555 file path=opt/zfs-tests/tests/functional/snapshot/deadlist_lock mode=0555 diff --git a/usr/src/test/zfs-tests/include/libtest.shlib b/usr/src/test/zfs-tests/include/libtest.shlib index a28f015de8..363f674f03 100644 --- a/usr/src/test/zfs-tests/include/libtest.shlib +++ b/usr/src/test/zfs-tests/include/libtest.shlib @@ -2880,3 +2880,22 @@ function get_tunable_impl return 1 } + +# +# Compute SHA256 digest for given file or stdin if no file given. +# Note: file path must not contain spaces +# +function sha256digest +{ + typeset file=$1 + + if [ -x /usr/bin/digest ]; then + /usr/bin/digest -a sha256 $file + elif [ -x /usr/bin/sha256sum ]; then + /usr/bin/sha256sum -b $file | awk '{ print $1 }' + else + echo "Cannot calculate SHA256 digest" + return 1 + fi + return 0 +} diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index 3e0b66aba8..d2e0f4df1d 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -641,7 +641,7 @@ tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', - 'slog_013_pos', 'slog_014_pos'] + 'slog_013_pos', 'slog_014_pos', 'slog_replay_fs_001', 'slog_replay_fs_002'] [/opt/zfs-tests/tests/functional/snapshot] tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos', diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index 998f8db0ca..b198fb9674 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -640,7 +640,7 @@ tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', - 'slog_013_pos', 'slog_014_pos'] + 'slog_013_pos', 'slog_014_pos', 'slog_replay_fs_001', 'slog_replay_fs_002'] [/opt/zfs-tests/tests/functional/snapshot] tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos', diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run index 6392469657..8d64315e5a 100644 --- a/usr/src/test/zfs-tests/runfiles/openindiana.run +++ b/usr/src/test/zfs-tests/runfiles/openindiana.run @@ -640,7 +640,7 @@ tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', - 'slog_013_pos', 'slog_014_pos'] + 'slog_013_pos', 'slog_014_pos', 'slog_replay_fs_001', 'slog_replay_fs_002'] [/opt/zfs-tests/tests/functional/snapshot] tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos', diff --git a/usr/src/test/zfs-tests/runfiles/smartos.run b/usr/src/test/zfs-tests/runfiles/smartos.run index 0f04168d75..ef6133bcfc 100644 --- a/usr/src/test/zfs-tests/runfiles/smartos.run +++ b/usr/src/test/zfs-tests/runfiles/smartos.run @@ -550,7 +550,7 @@ tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', - 'slog_013_pos', 'slog_014_pos'] + 'slog_013_pos', 'slog_014_pos', 'slog_replay_fs_001', 'slog_replay_fs_002'] [/opt/zfs-tests/tests/functional/snapshot] tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos', diff --git a/usr/src/test/zfs-tests/tests/functional/slog/setup.ksh b/usr/src/test/zfs-tests/tests/functional/slog/setup.ksh index c5c1a82710..3521001108 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/setup.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/setup.ksh @@ -38,13 +38,4 @@ if ! verify_slog_support ; then log_unsupported "This system doesn't support separate intent logs" fi -if [[ -d $VDEV ]]; then - log_must rm -rf $VDIR -fi -if [[ -d $VDEV2 ]]; then - log_must rm -rf $VDIR2 -fi -log_must mkdir -p $VDIR $VDIR2 -log_must mkfile $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 - log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog.kshlib b/usr/src/test/zfs-tests/tests/functional/slog/slog.kshlib index 493ceda60d..f96baf4967 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog.kshlib +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog.kshlib @@ -31,10 +31,20 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/slog/slog.cfg +function setup +{ + log_must rm -rf $VDIR $VDIR2 + log_must mkdir -p $VDIR $VDIR2 + log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 + + return 0 +} + function cleanup { poolexists $TESTPOOL && destroy_pool $TESTPOOL poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2 + rm -rf $TESTDIR $VDIR $VDIR2 } # diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_001_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_001_pos.ksh index 3d3daf5f9c..a4c35ed9e9 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_001_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_001_pos.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "Creating a pool with a log device succeeds." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_002_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_002_pos.ksh index b056f19cdb..91904aa612 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_002_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_002_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Adding a log device to normal pool works." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_003_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_003_pos.ksh index c647b8f54b..0b4d6ede3e 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_003_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_003_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Adding an extra log device works." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_004_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_004_pos.ksh index 4b0b3439a2..10f28dcc00 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_004_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_004_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Attaching a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_005_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_005_pos.ksh index cbbb948691..4836f6f279 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_005_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_005_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Detaching a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_006_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_006_pos.ksh index 53e8c67ca0..24143196fd 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_006_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_006_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Replacing a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_007_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_007_pos.ksh index 4926fb7b31..27ac38606c 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_007_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_007_pos.ksh @@ -48,6 +48,7 @@ verify_runnable "global" log_assert "Exporting and importing pool with log devices passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_008_neg.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_008_neg.ksh index 587e0e3212..54587a0c61 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_008_neg.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_008_neg.ksh @@ -44,6 +44,7 @@ verify_runnable "global" log_assert "A raidz/raidz2 log is not supported." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_009_neg.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_009_neg.ksh index e7091f17b7..222f71a999 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_009_neg.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_009_neg.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "A raidz/raidz2 log can not be added to existed pool." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_010_neg.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_010_neg.ksh index 8fe248ffbc..edd9abea09 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_010_neg.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_010_neg.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Slog device can not be replaced with spare device." log_onexit cleanup +log_must setup log_must zpool create $TESTPOOL $VDEV spare $SDEV log $LDEV sdev=$(random_get $SDEV) diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_011_neg.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_011_neg.ksh index 2dad200b31..3bebc82017 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_011_neg.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_011_neg.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Offline and online a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_012_neg.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_012_neg.ksh index 758563f23b..07e4e271fa 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_012_neg.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_012_neg.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "Pool can survive when one of mirror log device get corrupted." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_013_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_013_pos.ksh index 3607da7928..65a1ac5d68 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_013_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_013_pos.ksh @@ -54,6 +54,7 @@ log_assert "Verify slog device can be disk, file, lofi device or any device " \ "that presents a block interface." verify_disk_count "$DISKS" 2 log_onexit cleanup_testenv +log_must setup dsk1=${DISKS%% *} log_must zpool create $TESTPOOL ${DISKS#$dsk1} diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_014_pos.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_014_pos.ksh index b476d497b8..efd876d554 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_014_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_014_pos.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "log device can survive when one of the pool device get corrupted." +log_must setup for type in "mirror" "raidz" "raidz2"; do for spare in "" "spare"; do diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_015_neg.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_015_neg.ksh index 952c135a9a..0ca9e0b620 100644 --- a/usr/src/test/zfs-tests/tests/functional/slog/slog_015_neg.ksh +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_015_neg.ksh @@ -47,6 +47,7 @@ function cleanup ORIG_TIMEOUT=$(mdb -ke "zfs_commit_timeout_pct/J" | tail -1 | awk '{print $NF}') log_onexit cleanup +log_must setup for PCT in 0 1 2 4 8 16 32 64 128 256 512 1024; do log_must mdb -kwe "zfs_commit_timeout_pct/Z $PCT" diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh new file mode 100755 index 0000000000..364113925d --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -0,0 +1,215 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +# +# DESCRIPTION: +# Verify slogs are replayed correctly. This test is a direct +# adaptation of the ziltest.sh script for the ZFS Test Suite. +# +# The general idea is to build up an intent log from a bunch of +# diverse user commands without actually committing them to the +# file system. Then copy the file system, replay the intent +# log and compare the file system and the copy. +# +# To enable this automated testing of the intent log some minimal +# support is required of the file system. In particular, a +# "freeze" command is required to flush the in-flight transactions; +# to stop the actual committing of transactions; and to ensure no +# deltas are discarded. All deltas past a freeze point are kept +# for replay and comparison later. Here is the flow: +# +# STRATEGY: +# 1. Create an empty file system (TESTFS) +# 2. Freeze TESTFS +# 3. Run various user commands that create files, directories and ACLs +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# 5. Unmount filesystem +# <at this stage TESTFS is empty again and unfrozen, and the +# intent log contains a complete set of deltas to replay it> +# 6. Remount TESTFS <which replays the intent log> +# 7. Compare TESTFS against the TESTDIR/copy +# + +verify_runnable "global" + +# As long as we are not running slog_015_neg, the test pool could be hanging +# around. +poolexists $TESTPOOL && zpool destroy -f $TESTPOOL + +log_assert "Replay of intent log succeeds." +log_onexit cleanup +log_must setup + +# +# 1. Create an empty file system (TESTFS) +# +log_must zpool create $TESTPOOL $VDEV log mirror $LDEV +log_must zfs set compression=on $TESTPOOL +log_must zfs create $TESTPOOL/$TESTFS + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ + oflag=dsync,sync bs=1 count=1 + +# +# 2. Freeze TESTFS +# +log_must zpool freeze $TESTPOOL + +# +# 3. Run various user commands that create files, directories and ACLs +# + +# TX_CREATE +log_must touch /$TESTPOOL/$TESTFS/a + +# TX_RENAME +log_must mv /$TESTPOOL/$TESTFS/a /$TESTPOOL/$TESTFS/b + +# TX_SYMLINK +log_must touch /$TESTPOOL/$TESTFS/c +log_must ln -s /$TESTPOOL/$TESTFS/c /$TESTPOOL/$TESTFS/d + +# TX_LINK +log_must touch /$TESTPOOL/$TESTFS/e +log_must ln /$TESTPOOL/$TESTFS/e /$TESTPOOL/$TESTFS/f + +# TX_MKDIR +log_must mkdir /$TESTPOOL/$TESTFS/dir_to_delete + +# TX_RMDIR +log_must rmdir /$TESTPOOL/$TESTFS/dir_to_delete + +# Create a simple validation payload +log_must mkdir -p $TESTDIR +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/payload bs=1k count=8 +typeset checksum=$(sha256digest /$TESTPOOL/$TESTFS/payload) + +# TX_WRITE (small file with ordering) +log_must mkfile 1k /$TESTPOOL/$TESTFS/small_file +log_must mkfile 512b /$TESTPOOL/$TESTFS/small_file + +# TX_CREATE, TX_MKDIR, TX_REMOVE, TX_RMDIR +log_must cp -R /usr/dict /$TESTPOOL/$TESTFS +log_must rm -rf /$TESTPOOL/$TESTFS/dict + +# TX_SETATTR +log_must touch /$TESTPOOL/$TESTFS/setattr +log_must chmod 567 /$TESTPOOL/$TESTFS/setattr +log_must chgrp root /$TESTPOOL/$TESTFS/setattr +log_must touch -cm -t 201311271200 /$TESTPOOL/$TESTFS/setattr + +# TX_TRUNCATE (to zero) +log_must mkfile 4k /$TESTPOOL/$TESTFS/truncated_file +log_must truncate -s 0 /$TESTPOOL/$TESTFS/truncated_file + +# TX_WRITE (large file) +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/large \ + bs=128k count=64 oflag=sync + +# Write zeros, which compress to holes, in the middle of a file +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=8 +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.1 bs=128k count=2 + +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.2 bs=128k count=8 +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.2 bs=128k count=2 seek=2 + +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.3 bs=128k count=8 +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/holes.3 bs=128k count=2 \ + seek=2 conv=notrunc + +# TX_MKXATTR +# log_must mkdir /$TESTPOOL/$TESTFS/xattr.dir +# log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.dir +# log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.dir +# log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.dir + +# log_must touch /$TESTPOOL/$TESTFS/xattr.file +# log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file +# log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file +# log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file + +# TX_WRITE, TX_LINK, TX_REMOVE +# Make sure TX_REMOVE won't affect TX_WRITE if file is not destroyed +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/link_and_unlink bs=128k \ + count=8 +log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \ + /$TESTPOOL/$TESTFS/link_and_unlink.link +log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link + +# +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# +log_must mkdir -p $TESTDIR/copy +log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/ + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is empty again and frozen, the intent log contains +# a complete set of deltas to replay. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS <which replays the intent log> +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# 7. Compare TESTFS against the TESTDIR/copy +# +log_note "Verify current block usage:" +log_must zdb -bcv $TESTPOOL + +# log_note "Verify copy of xattrs:" +# log_must attr -l /$TESTPOOL/$TESTFS/xattr.dir +# log_must attr -l /$TESTPOOL/$TESTFS/xattr.file + +log_note "Verify working set diff:" +log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy + +log_note "Verify file checksum:" +typeset checksum1=$(sha256digest /$TESTPOOL/$TESTFS/payload) +[[ "$checksum1" == "$checksum" ]] || \ + log_fail "checksum mismatch ($checksum1 != $checksum)" + +log_pass "Replay of intent log succeeds." diff --git a/usr/src/test/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh b/usr/src/test/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh new file mode 100755 index 0000000000..b334cac45b --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh @@ -0,0 +1,137 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +# +# DESCRIPTION: +# Verify slog replay correctly when TX_REMOVEs are followed by +# TX_CREATEs. +# +# STRATEGY: +# 1. Create a file system (TESTFS) with a lot of files +# 2. Freeze TESTFS +# 3. Remove all files then create a lot of files +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# 5. Unmount filesystem +# <at this stage TESTFS is empty again and unfrozen, and the +# intent log contains a complete set of deltas to replay it> +# 6. Remount TESTFS <which replays the intent log> +# 7. Compare TESTFS against the TESTDIR/copy +# + +verify_runnable "global" + +function cleanup_fs +{ + cleanup +} + +log_assert "Replay of intent log succeeds." +log_onexit cleanup_fs +log_must setup + +# +# 1. Create a file system (TESTFS) with a lot of files +# +log_must zpool create $TESTPOOL $VDEV log mirror $LDEV +log_must zfs set compression=on $TESTPOOL +log_must zfs create $TESTPOOL/$TESTFS + +# Prep for the test of TX_REMOVE followed by TX_CREATE +dnsize=(legacy auto 1k 2k 4k 8k 16k) +NFILES=200 +log_must mkdir /$TESTPOOL/$TESTFS/dir0 +log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done' + +# +# Reimport to reset dnode allocation pointer. +# This is to make sure we will have TX_REMOVE and TX_CREATE on same id +# +log_must zpool export $TESTPOOL +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ + oflag=dsync,sync bs=1 count=1 + +# +# 2. Freeze TESTFS +# +log_must zpool freeze $TESTPOOL + +# +# 3. Remove all files then create a lot of files +# +# TX_REMOVE followed by TX_CREATE +log_must eval 'rm -f /$TESTPOOL/$TESTFS/dir0/*' +log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done' + +# +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# +log_must mkdir -p $TESTDIR/copy +log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/ + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is empty again and frozen, the intent log contains +# a complete set of deltas to replay. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS <which replays the intent log> +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# 7. Compare TESTFS against the TESTDIR/copy +# +log_note "Verify current block usage:" +log_must zdb -bcv $TESTPOOL + +log_note "Verify number of files" +log_must test "$(ls /$TESTPOOL/$TESTFS/dir0 | wc -l)" -eq $NFILES + +log_note "Verify working set diff:" +log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy + +log_pass "Replay of intent log succeeds." diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c index 90f425a800..2747d2f394 100644 --- a/usr/src/uts/common/fs/zfs/dnode.c +++ b/usr/src/uts/common/fs/zfs/dnode.c @@ -56,7 +56,6 @@ dnode_stats_t dnode_stats = { { "dnode_hold_free_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 }, { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 }, - { "dnode_hold_free_txg", KSTAT_DATA_UINT64 }, { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_allocate", KSTAT_DATA_UINT64 }, { "dnode_reallocate", KSTAT_DATA_UINT64 }, @@ -1260,6 +1259,10 @@ dnode_buf_evict_async(void *dbu) * as an extra dnode slot by an large dnode, in which case it returns * ENOENT. * + * If the DNODE_DRY_RUN flag is set, we don't actually hold the dnode, just + * return whether the hold would succeed or not. tag and dnp should set to + * NULL in this case. + * * errors: * EINVAL - invalid object number or flags. * ENOSPC - hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE) @@ -1287,6 +1290,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0)); ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0)); + IMPLY(flag & DNODE_DRY_RUN, (tag == NULL) && (dnp == NULL)); /* * If you are holding the spa config lock as writer, you shouldn't @@ -1316,8 +1320,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE) return (SET_ERROR(EEXIST)); DNODE_VERIFY(dn); - (void) zfs_refcount_add(&dn->dn_holds, tag); - *dnp = dn; + /* Don't actually hold if dry run, just return 0 */ + if (!(flag & DNODE_DRY_RUN)) { + (void) zfs_refcount_add(&dn->dn_holds, tag); + *dnp = dn; + } return (0); } @@ -1462,6 +1469,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(ENOENT)); } + /* Don't actually hold if dry run, just return 0 */ + if (flag & DNODE_DRY_RUN) { + mutex_exit(&dn->dn_mtx); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (0); + } + DNODE_STAT_BUMP(dnode_hold_alloc_hits); } else if (flag & DNODE_MUST_BE_FREE) { @@ -1521,6 +1536,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(EEXIST)); } + /* Don't actually hold if dry run, just return 0 */ + if (flag & DNODE_DRY_RUN) { + mutex_exit(&dn->dn_mtx); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (0); + } + dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR); DNODE_STAT_BUMP(dnode_hold_free_hits); } else { @@ -1528,15 +1551,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(EINVAL)); } - if (dn->dn_free_txg) { - DNODE_STAT_BUMP(dnode_hold_free_txg); - type = dn->dn_type; - mutex_exit(&dn->dn_mtx); - dnode_slots_rele(dnc, idx, slots); - dbuf_rele(db, FTAG); - return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ? - ENOENT : EEXIST)); - } + ASSERT0(dn->dn_free_txg); if (zfs_refcount_add(&dn->dn_holds, tag) == 1) dbuf_add_ref(db, dnh); @@ -1627,6 +1642,16 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) } } +/* + * Test whether we can create a dnode at the specified location. + */ +int +dnode_try_claim(objset_t *os, uint64_t object, int slots) +{ + return (dnode_hold_impl(os, object, DNODE_MUST_BE_FREE | DNODE_DRY_RUN, + slots, NULL, NULL)); +} + void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h index 6c8ec5e229..054e467bb7 100644 --- a/usr/src/uts/common/fs/zfs/sys/dnode.h +++ b/usr/src/uts/common/fs/zfs/sys/dnode.h @@ -46,6 +46,7 @@ extern "C" { */ #define DNODE_MUST_BE_ALLOCATED 1 #define DNODE_MUST_BE_FREE 2 +#define DNODE_DRY_RUN 4 /* * dnode_next_offset() flags. @@ -393,6 +394,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots, boolean_t dnode_add_ref(dnode_t *dn, void *ref); void dnode_rele(dnode_t *dn, void *ref); void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting); +int dnode_try_claim(objset_t *os, uint64_t object, int slots); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, @@ -511,11 +513,6 @@ typedef struct dnode_stats { */ kstat_named_t dnode_hold_free_overflow; /* - * Number of times a dnode_hold(...) was attempted on a dnode - * which had already been unlinked in an earlier txg. - */ - kstat_named_t dnode_hold_free_txg; - /* * Number of times dnode_free_interior_slots() needed to retry * acquiring a slot zrl lock due to contention. */ diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h index c70eeec4ba..8b61ddb351 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h @@ -332,7 +332,7 @@ extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, vattr_t *vap); extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid); + znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked); #define ZFS_NO_OBJECT 0 /* no object id */ extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name); diff --git a/usr/src/uts/common/fs/zfs/zfs_log.c b/usr/src/uts/common/fs/zfs/zfs_log.c index 7d3e1cc42a..ed39f5327d 100644 --- a/usr/src/uts/common/fs/zfs/zfs_log.c +++ b/usr/src/uts/common/fs/zfs/zfs_log.c @@ -354,12 +354,14 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zil_itx_assign(zilog, itx, tx); } +void zil_remove_async(zilog_t *zilog, uint64_t oid); + /* * Handles both TX_REMOVE and TX_RMDIR transactions. */ void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid) + znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked) { itx_t *itx; lr_remove_t *lr; @@ -375,6 +377,17 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, itx->itx_oid = foid; + /* + * Object ids can be re-instantiated in the next txg so + * remove any async transactions to avoid future leaks. + * This can happen if a fsync occurs on the re-instantiated + * object for a WR_INDIRECT or WR_NEED_COPY write, which gets + * the new file data and flushes a write record for the old object. + */ + if (unlinked) { + ASSERT((txtype & ~TX_CI) == TX_REMOVE); + zil_remove_async(zilog, foid); + } zil_itx_assign(zilog, itx, tx); } diff --git a/usr/src/uts/common/fs/zfs/zfs_replay.c b/usr/src/uts/common/fs/zfs/zfs_replay.c index 969a56dc9b..2c643d6210 100644 --- a/usr/src/uts/common/fs/zfs/zfs_replay.c +++ b/usr/src/uts/common/fs/zfs/zfs_replay.c @@ -335,8 +335,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) xva.xva_vattr.va_nblocks = lr->lr_gen; xva.xva_vattr.va_fsid = dnodesize; - error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); - if (error != ENOENT) + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) goto bail; if (lr->lr_common.lrc_txtype & TX_CI) @@ -469,8 +469,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) xva.xva_vattr.va_nblocks = lr->lr_gen; xva.xva_vattr.va_fsid = dnodesize; - error = dmu_object_info(zfsvfs->z_os, objid, NULL); - if (error != ENOENT) + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) goto out; if (lr->lr_common.lrc_txtype & TX_CI) diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index a5a8f79317..8c84e93240 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -1918,7 +1918,7 @@ top: txtype = TX_REMOVE; if (flags & FIGNORECASE) txtype |= TX_CI; - zfs_log_remove(zilog, tx, txtype, dzp, name, obj); + zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); dmu_tx_commit(tx); out: @@ -2234,7 +2234,8 @@ top: uint64_t txtype = TX_RMDIR; if (flags & FIGNORECASE) txtype |= TX_CI; - zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); + zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT, + B_FALSE); } dmu_tx_commit(tx); diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c index e56104f979..bd058fbd4f 100644 --- a/usr/src/uts/common/fs/zfs/zil.c +++ b/usr/src/uts/common/fs/zfs/zil.c @@ -1749,7 +1749,7 @@ zil_aitx_compare(const void *x1, const void *x2) /* * Remove all async itx with the given oid. */ -static void +void zil_remove_async(zilog_t *zilog, uint64_t oid) { uint64_t otxg, txg; @@ -1802,16 +1802,6 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) itxs_t *itxs, *clean = NULL; /* - * Object ids can be re-instantiated in the next txg so - * remove any async transactions to avoid future leaks. - * This can happen if a fsync occurs on the re-instantiated - * object for a WR_INDIRECT or WR_NEED_COPY write, which gets - * the new file data and flushes a write record for the old object. - */ - if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE) - zil_remove_async(zilog, itx->itx_oid); - - /* * Ensure the data of a renamed file is committed before the rename. */ if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME) |