summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Dagnelie <pcd@delphix.com>2016-03-09 09:12:17 -0800
committerMatthew Ahrens <mahrens@delphix.com>2016-03-09 13:10:29 -0800
commit286ef71398fb54b1d5007d6f45aa4320a9e0ede2 (patch)
tree90aced7e28a61100529163865eeb7d161b2617cb
parent4549a1bd3fb75b70dd34ba46715014effe21dc07 (diff)
downloadillumos-joyent-286ef71398fb54b1d5007d6f45aa4320a9e0ede2.tar.gz
6370 ZFS send fails to transmit some holes
Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Chris Williamson <chris.williamson@delphix.com> Reviewed by: Stefan Ring <stefanrin@gmail.com> Reviewed by: Steven Burgess <sburgess@datto.com> Reviewed by: Arne Jansen <sensille@gmx.net> Approved by: Robert Mustacchi <rm@joyent.com>
-rw-r--r--usr/src/pkg/manifests/system-test-zfstest.mf3
-rw-r--r--usr/src/test/zfs-tests/cmd/Makefile1
-rw-r--r--usr/src/test/zfs-tests/cmd/mkfiles/Makefile23
-rw-r--r--usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c63
-rw-r--r--usr/src/test/zfs-tests/include/default.cfg3
-rw-r--r--usr/src/test/zfs-tests/runfiles/delphix.run3
-rw-r--r--usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh5
-rw-r--r--usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile5
-rw-r--r--usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh99
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_object.c8
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_traverse.c49
11 files changed, 244 insertions, 18 deletions
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf
index cb1016e921..457fe05e29 100644
--- a/usr/src/pkg/manifests/system-test-zfstest.mf
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf
@@ -152,6 +152,7 @@ file path=opt/zfs-tests/bin/file_write mode=0555
file path=opt/zfs-tests/bin/getholes mode=0555
file path=opt/zfs-tests/bin/largest_file mode=0555
file path=opt/zfs-tests/bin/mkbusy mode=0555
+file path=opt/zfs-tests/bin/mkfiles mode=0555
file path=opt/zfs-tests/bin/mkholes mode=0555
file path=opt/zfs-tests/bin/mktree mode=0555
file path=opt/zfs-tests/bin/mmapwrite mode=0555
@@ -794,6 +795,8 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_005_pos \
mode=0555
file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos \
mode=0555
+file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos \
+ mode=0555
file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_001_pos \
mode=0555
file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg \
diff --git a/usr/src/test/zfs-tests/cmd/Makefile b/usr/src/test/zfs-tests/cmd/Makefile
index 031b8ff1f6..5bc6db13c3 100644
--- a/usr/src/test/zfs-tests/cmd/Makefile
+++ b/usr/src/test/zfs-tests/cmd/Makefile
@@ -24,6 +24,7 @@ SUBDIRS = chg_usr_exec \
getholes \
largest_file \
mkbusy \
+ mkfiles \
mkholes \
mktree \
mmapwrite \
diff --git a/usr/src/test/zfs-tests/cmd/mkfiles/Makefile b/usr/src/test/zfs-tests/cmd/mkfiles/Makefile
new file mode 100644
index 0000000000..7e833fefb1
--- /dev/null
+++ b/usr/src/test/zfs-tests/cmd/mkfiles/Makefile
@@ -0,0 +1,23 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+
+PROG = mkfiles
+
+include $(SRC)/cmd/Makefile.cmd
+
+LDLIBS += -lc
+C99MODE = -xc99=%all
+
+include ../Makefile.subdirs
diff --git a/usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c b/usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c
new file mode 100644
index 0000000000..58c7d5f509
--- /dev/null
+++ b/usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/param.h>
+
+#define MAX_INT_LENGTH 10
+
+static void
+usage(char *msg, int exit_value)
+{
+ (void) fprintf(stderr, "mkfiles basename max_file [min_file]\n");
+ (void) fprintf(stderr, "%s\n", msg);
+ exit(exit_value);
+}
+
+int
+main(int argc, char **argv)
+{
+ unsigned int numfiles = 0;
+ unsigned int first_file = 0;
+ if (argc < 3 || argc > 4)
+ usage("Invalid number of arguments", -1);
+
+ if (sscanf(argv[2], "%u", &numfiles) != 1)
+ usage("Invalid maximum file", -2);
+
+ if (argc == 4 && sscanf(argv[3], "%u", &first_file) != 1)
+ usage("Invalid first file", -3);
+
+ if (numfiles < first_file)
+ usage("First file larger than last file", -3);
+
+ char buf[MAXPATHLEN];
+ for (unsigned int i = first_file; i <= numfiles; i++) {
+ int fd;
+ (void) snprintf(buf, MAXPATHLEN, "%s%u", argv[1], i);
+ if ((fd = open(buf, O_CREAT | O_EXCL, O_RDWR)) == -1) {
+ (void) fprintf(stderr, "Failed to create %s %s\n", buf,
+ strerror(errno));
+ return (-4);
+ }
+ (void) close(fd);
+ }
+ return (0);
+}
diff --git a/usr/src/test/zfs-tests/include/default.cfg b/usr/src/test/zfs-tests/include/default.cfg
index fa57c978a7..61fb25e628 100644
--- a/usr/src/test/zfs-tests/include/default.cfg
+++ b/usr/src/test/zfs-tests/include/default.cfg
@@ -25,7 +25,7 @@
#
#
-# Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
#
. $STF_SUITE/include/commands.cfg
@@ -50,6 +50,7 @@ export FILE_WRITE="/opt/zfs-tests/bin/file_write"
export GETHOLES="/opt/zfs-tests/bin/getholes"
export LARGEST_FILE="/opt/zfs-tests/bin/largest_file"
export MKBUSY="/opt/zfs-tests/bin/mkbusy"
+export MKFILES="/opt/zfs-tests/bin/mkfiles"
export MKHOLES="/opt/zfs-tests/bin/mkholes"
export MKTREE="/opt/zfs-tests/bin/mktree"
export MMAPWRITE="/opt/zfs-tests/bin/mmapwrite"
diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run
index bc6c80d491..bcd19ae686 100644
--- a/usr/src/test/zfs-tests/runfiles/delphix.run
+++ b/usr/src/test/zfs-tests/runfiles/delphix.run
@@ -168,7 +168,8 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos',
[/opt/zfs-tests/tests/functional/cli_root/zfs_send]
tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
- 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos']
+ 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos',
+ 'zfs_send_007_pos']
[/opt/zfs-tests/tests/functional/cli_root/zfs_set]
tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh
index f9c1ec4a5f..b1fbff2976 100644
--- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh
@@ -137,6 +137,11 @@ $TOUCH $mntpnt2/f18
$RM $mntpnt/h17
$RM $mntpnt2/h*
+# Add empty objects to $fs to exercise dmu_traverse code
+for i in `seq 1 100`; do
+ log_must touch $mntpnt/uf$i
+done
+
log_must $ZFS snapshot $fs@s1
log_must $ZFS snapshot $fs2@s1
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile
index e8d5a70bf6..2647f8bbe4 100644
--- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile
@@ -10,7 +10,7 @@
#
#
-# Copyright (c) 2012 by Delphix. All rights reserved.
+# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
#
include $(SRC)/Makefile.master
@@ -25,7 +25,8 @@ PROGS = cleanup \
zfs_send_003_pos \
zfs_send_004_neg \
zfs_send_005_pos \
- zfs_send_006_pos
+ zfs_send_006_pos \
+ zfs_send_007_pos
FILES = zfs_send.cfg
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh
new file mode 100644
index 0000000000..13ae4f0248
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh
@@ -0,0 +1,99 @@
+#!/bin/ksh
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+#
+# DESCRIPTION:
+# Verify 'zfs send' drills holes appropriately when files are replaced
+#
+# STRATEGY:
+# 1. Create dataset
+# 2. Write block 0 in a bunch of files
+# 3. Snapshot the dataset
+# 4. Remove all the files and rewrite some files with just block 1
+# 5. Snapshot the dataset
+# 6. Send both snapshots and receive them locally
+# 7. diff the received dataset and the old datasets.
+# 8. Repeat steps 1-7 above with pool that never had hole birth enabled.
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ $ZFS destroy -rf $TESTPOOL/fs
+ $ZFS destroy -rf $TESTPOOL/recvfs
+ $RM $streamfile
+ $RM $vdev
+ $ZPOOL destroy testpool
+}
+
+
+log_assert "Verify that 'zfs send' drills appropriate holes"
+log_onexit cleanup
+streamfile=$(mktemp /var/tmp/file.XXXXXX)
+vdev=$(mktemp /var/tmp/file.XXXXXX)
+
+
+test_pool ()
+{
+ POOL=$1
+ log_must $ZFS create -o recordsize=512 $POOL/fs
+ mntpnt=$(get_prop mountpoint "$POOL/fs")
+ log_must $DD if=/dev/urandom of=${mntpnt}/file bs=512 count=1 2>/dev/null
+ first_object=$(ls -i $mntpnt | awk '{print $1}')
+ log_must $ZFS snapshot $POOL/fs@a
+ while true; do
+ log_must $FIND $mntpnt -delete
+ sync
+ log_must $MKFILES "$mntpnt/" 4000
+ FILE=$(ls -i $mntpnt | awk \
+ '{if ($1 == '$first_object') {print $2}}')
+ if [[ -n "$FILE" ]]; then
+ break
+ fi
+ done
+ $DD if=/dev/urandom of=${mntpnt}/$FILE bs=512 count=1 seek=1 2>/dev/null
+
+ log_must $ZFS snapshot $POOL/fs@b
+
+ log_must eval "$ZFS send $POOL/fs@a > $streamfile"
+ $CAT $streamfile | log_must $ZFS receive $POOL/recvfs
+
+ log_must eval "$ZFS send -i @a $POOL/fs@b > $streamfile"
+ $CAT $streamfile | log_must $ZFS receive $POOL/recvfs
+
+ recv_mntpnt=$(get_prop mountpoint "$POOL/recvfs")
+ log_must $DIFF -r $mntpnt $recv_mntpnt
+ log_must $ZFS destroy -rf $POOL/fs
+ log_must $ZFS destroy -rf $POOL/recvfs
+}
+
+test_pool $TESTPOOL
+log_must $TRUNCATE --size=1G $vdev
+log_must $ZPOOL create -o version=1 testpool $vdev
+test_pool testpool
+log_must $ZPOOL destroy testpool
+log_must $ZPOOL create -d testpool $vdev
+test_pool testpool
+log_must $ZPOOL destroy testpool
+
+
+log_pass "'zfs send' drills appropriate holes"
diff --git a/usr/src/uts/common/fs/zfs/dmu_object.c b/usr/src/uts/common/fs/zfs/dmu_object.c
index 6ca021eecb..2c9802f51e 100644
--- a/usr/src/uts/common/fs/zfs/dmu_object.c
+++ b/usr/src/uts/common/fs/zfs/dmu_object.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved.
*/
@@ -50,6 +50,12 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
* reasonably sparse (at most 1/4 full). Look from the
* beginning once, but after that keep looking from here.
* If we can't find one, just keep going from here.
+ *
+ * Note that dmu_traverse depends on the behavior that we use
+ * multiple blocks of the dnode object before going back to
+ * reuse objects. Any change to this algorithm should preserve
+ * that property or find another solution to the issues
+ * described in traverse_visitbp.
*/
if (P2PHASE(object, L2_dnode_count) == 0) {
uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
diff --git a/usr/src/uts/common/fs/zfs/dmu_traverse.c b/usr/src/uts/common/fs/zfs/dmu_traverse.c
index e8739eddaf..2822ca4525 100644
--- a/usr/src/uts/common/fs/zfs/dmu_traverse.c
+++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -62,6 +62,7 @@ typedef struct traverse_data {
uint64_t td_hole_birth_enabled_txg;
blkptr_cb_t *td_func;
void *td_arg;
+ boolean_t td_realloc_possible;
} traverse_data_t;
static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
@@ -231,18 +232,30 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
if (bp->blk_birth == 0) {
/*
- * Since this block has a birth time of 0 it must be a
- * hole created before the SPA_FEATURE_HOLE_BIRTH
- * feature was enabled. If SPA_FEATURE_HOLE_BIRTH
- * was enabled before the min_txg for this traveral we
- * know the hole must have been created before the
- * min_txg for this traveral, so we can skip it. If
- * SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg
- * for this traveral we cannot tell if the hole was
- * created before or after the min_txg for this
- * traversal, so we cannot skip it.
+ * Since this block has a birth time of 0 it must be one of
+ * two things: a hole created before the
+ * SPA_FEATURE_HOLE_BIRTH feature was enabled, or a hole
+ * which has always been a hole in an object.
+ *
+ * If a file is written sparsely, then the unwritten parts of
+ * the file were "always holes" -- that is, they have been
+ * holes since this object was allocated. However, we (and
+ * our callers) can not necessarily tell when an object was
+ * allocated. Therefore, if it's possible that this object
+ * was freed and then its object number reused, we need to
+ * visit all the holes with birth==0.
+ *
+ * If it isn't possible that the object number was reused,
+ * then if SPA_FEATURE_HOLE_BIRTH was enabled before we wrote
+ * all the blocks we will visit as part of this traversal,
+ * then this hole must have always existed, so we can skip
+ * it. We visit blocks born after (exclusive) td_min_txg.
+ *
+ * Note that the meta-dnode cannot be reallocated.
*/
- if (td->td_hole_birth_enabled_txg < td->td_min_txg)
+ if ((!td->td_realloc_possible ||
+ zb->zb_object == DMU_META_DNODE_OBJECT) &&
+ td->td_hole_birth_enabled_txg <= td->td_min_txg)
return (0);
} else if (bp->blk_birth <= td->td_min_txg) {
return (0);
@@ -337,6 +350,15 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
objset_phys_t *osp = buf->b_data;
prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
+ /*
+ * See the block comment above for the goal of this variable.
+ * If the maxblkid of the meta-dnode is 0, then we know that
+ * we've never had more than DNODES_PER_BLOCK objects in the
+ * dataset, which means we can't have reused any object ids.
+ */
+ if (osp->os_meta_dnode.dn_maxblkid == 0)
+ td->td_realloc_possible = B_FALSE;
+
if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
zb->zb_objset, DMU_GROUPUSED_OBJECT);
@@ -543,12 +565,13 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
td.td_pfd = &pd;
td.td_flags = flags;
td.td_paused = B_FALSE;
+ td.td_realloc_possible = (txg_start == 0 ? B_FALSE : B_TRUE);
if (spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
VERIFY(spa_feature_enabled_txg(spa,
SPA_FEATURE_HOLE_BIRTH, &td.td_hole_birth_enabled_txg));
} else {
- td.td_hole_birth_enabled_txg = 0;
+ td.td_hole_birth_enabled_txg = UINT64_MAX;
}
pd.pd_flags = flags;