diff options
-rw-r--r-- | usr/src/cmd/ztest/ztest.c | 27 | ||||
-rw-r--r-- | usr/src/lib/libc/port/mapfile-vers | 5 | ||||
-rw-r--r-- | usr/src/man/man1m/zfs.1m | 9 | ||||
-rw-r--r-- | usr/src/pkg/manifests/system-header.mf | 1 | ||||
-rw-r--r-- | usr/src/pkg/manifests/system-kernel.mf | 7 | ||||
-rw-r--r-- | usr/src/pkg/manifests/system-library.man3c.inc | 1 | ||||
-rw-r--r-- | usr/src/pkg/manifests/system-test-zfstest.mf | 5 | ||||
-rw-r--r-- | usr/src/test/zfs-tests/runfiles/delphix.run | 3 | ||||
-rw-r--r-- | usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile | 5 | ||||
-rw-r--r-- | usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh | 172 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_send.c | 158 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu_impl.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h | 12 | ||||
-rw-r--r-- | usr/src/uts/common/io/signalfd.c | 32 | ||||
-rw-r--r-- | usr/src/uts/common/os/sig.c | 1 |
15 files changed, 359 insertions, 82 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index c5eae695c9..764008f5b8 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -4780,7 +4780,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) char path0[MAXPATHLEN]; char pathrand[MAXPATHLEN]; size_t fsize; - int bshift = SPA_OLD_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ + int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ int iters = 1000; int maxfaults; int mirror_save; @@ -4941,6 +4941,31 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) fsize = lseek(fd, 0, SEEK_END); while (--iters != 0) { + /* + * The offset must be chosen carefully to ensure that + * we do not inject a given logical block with errors + * on two different leaf devices, because ZFS can not + * tolerate that (if maxfaults==1). + * + * We divide each leaf into chunks of size + * (# leaves * SPA_MAXBLOCKSIZE * 4). Within each chunk + * there is a series of ranges to which we can inject errors. + * Each range can accept errors on only a single leaf vdev. + * The error injection ranges are separated by ranges + * which we will not inject errors on any device (DMZs). + * Each DMZ must be large enough such that a single block + * can not straddle it, so that a single block can not be + * a target in two different injection ranges (on different + * leaf vdevs). + * + * For example, with 3 leaves, each chunk looks like: + * 0 to 32M: injection range for leaf 0 + * 32M to 64M: DMZ - no injection allowed + * 64M to 96M: injection range for leaf 1 + * 96M to 128M: DMZ - no injection allowed + * 128M to 160M: injection range for leaf 2 + * 160M to 192M: DMZ - no injection allowed + */ offset = ztest_random(fsize / (leaves << bshift)) * (leaves << bshift) + (leaf << bshift) + (ztest_random(1ULL << (bshift - 1)) & -8ULL); diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index cbb5a0497b..6c1072d92f 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -93,6 +93,11 @@ $if _x86 && _ELF64 $add amd64 $endif +SYMBOL_VERSION ILLUMOS_0.18 { # signalfd + protected: + signalfd; +} ILLUMOS_0.17; + SYMBOL_VERSION ILLUMOS_0.17 { # glob(3C) LFS $if lf64 protected: diff --git a/usr/src/man/man1m/zfs.1m b/usr/src/man/man1m/zfs.1m index fae4025c40..7300fa6396 100644 --- a/usr/src/man/man1m/zfs.1m +++ b/usr/src/man/man1m/zfs.1m @@ -21,7 +21,7 @@ .\" .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org> -.\" Copyright (c) 2011, 2014 by Delphix. All rights reserved. +.\" Copyright (c) 2011, 2015 by Delphix. All rights reserved. .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. .\" Copyright (c) 2015, Joyent, Inc. All rights reserved. .\" Copyright (c) 2014 by Adam Stevko. All rights reserved. @@ -2775,8 +2775,11 @@ Do not actually receive the stream. This can be useful in conjunction with the option to verify the name the receive operation would use. .It Fl o Sy origin Ns = Ns Ar snapshot Forces the stream to be received as a clone of the given snapshot. -This is only valid if the stream is an incremental stream whose source -is the same as the provided origin. +If the stream is a full send stream, this will create the filesystem +described by the stream as a clone of the specified snapshot. Which +snapshot was specified will not affect the success or failure of the +receive, as long as the snapshot does exist. If the stream is an +incremental send stream, all the normal verification will be performed. .It Fl u File system that is associated with the received stream is not mounted. .It Fl v diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index 4551ca095c..697b75b6c2 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -1428,6 +1428,7 @@ file path=usr/include/sys/shm_impl.h file path=usr/include/sys/sid.h file path=usr/include/sys/siginfo.h file path=usr/include/sys/signal.h +file path=usr/include/sys/signalfd.h file path=usr/include/sys/skein.h file path=usr/include/sys/sleepq.h file path=usr/include/sys/smbios.h diff --git a/usr/src/pkg/manifests/system-kernel.mf b/usr/src/pkg/manifests/system-kernel.mf index d3cf047cb9..a00bb109cc 100644 --- a/usr/src/pkg/manifests/system-kernel.mf +++ b/usr/src/pkg/manifests/system-kernel.mf @@ -94,6 +94,9 @@ dir path=lib/svc dir path=lib/svc/manifest group=sys dir path=lib/svc/manifest/system group=sys dir path=lib/svc/method +dir path=usr/kernel group=sys +dir path=usr/kernel/drv group=sys +dir path=usr/kernel/drv/$(ARCH64) group=sys dir path=usr/share/man dir path=usr/share/man/man1m dir path=usr/share/man/man2 @@ -254,6 +257,7 @@ $(i386_ONLY)driver name=sd perms="* 0640 root sys" \ driver name=sgen perms="* 0600 root sys" \ alias=scsa,08.bfcp \ alias=scsa,08.bvhci +driver name=signalfd perms="* 0666 root sys" driver name=simnet clone_perms="simnet 0666 root sys" perms="* 0666 root sys" $(i386_ONLY)driver name=smbios perms="smbios 0444 root sys" driver name=softmac @@ -820,6 +824,9 @@ file path=lib/svc/manifest/system/scheduler.xml group=sys mode=0444 file path=lib/svc/method/svc-dumpadm mode=0555 file path=lib/svc/method/svc-intrd mode=0555 file path=lib/svc/method/svc-scheduler mode=0555 +file path=usr/kernel/drv/$(ARCH64)/signalfd group=sys +$(i386_ONLY)file path=usr/kernel/drv/signalfd group=sys +file path=usr/kernel/drv/signalfd.conf group=sys $(sparc_ONLY)file path=usr/share/man/man1m/monitor.1m $(sparc_ONLY)file path=usr/share/man/man1m/obpsym.1m # On SPARC driver/bscv is Serverblade1 specific, and in system/kernel/platform diff --git a/usr/src/pkg/manifests/system-library.man3c.inc b/usr/src/pkg/manifests/system-library.man3c.inc index ae061edac9..30999ee484 100644 --- a/usr/src/pkg/manifests/system-library.man3c.inc +++ b/usr/src/pkg/manifests/system-library.man3c.inc @@ -406,6 +406,7 @@ file path=usr/share/man/man3c/shm_unlink.3c file path=usr/share/man/man3c/sigfpe.3c file path=usr/share/man/man3c/siginterrupt.3c file path=usr/share/man/man3c/signal.3c +file path=usr/share/man/man3c/signalfd.3c file path=usr/share/man/man3c/sigqueue.3c file path=usr/share/man/man3c/sigsetops.3c file path=usr/share/man/man3c/sigstack.3c diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index ffea12e25b..70e5ff602e 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -10,7 +10,7 @@ # # -# Copyright (c) 2012, 2014 by Delphix. All rights reserved. +# Copyright (c) 2012, 2015 by Delphix. All rights reserved. # Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. # Copyright 2015, Nexenta Systems Inc. All rights reserved. # @@ -662,6 +662,9 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_009_neg \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.cfg \ diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index 0bac84c12e..266247bd4a 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -139,7 +139,8 @@ tests = ['zfs_written_property_001_pos'] [/opt/zfs-tests/tests/functional/cli_root/zfs_receive] tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_005_neg', 'zfs_receive_006_pos', - 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg'] + 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', + 'zfs_receive_010_pos'] [/opt/zfs-tests/tests/functional/cli_root/zfs_rename] tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile index 3e9f28238b..f203bfc344 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile @@ -10,7 +10,7 @@ # # -# Copyright (c) 2012 by Delphix. All rights reserved. +# Copyright (c) 2012, 2015 by Delphix. All rights reserved. # include $(SRC)/Makefile.master @@ -28,7 +28,8 @@ PROGS = cleanup \ zfs_receive_006_pos \ zfs_receive_007_neg \ zfs_receive_008_pos \ - zfs_receive_009_neg + zfs_receive_009_neg \ + zfs_receive_010_pos CMDS = $(PROGS:%=$(TESTDIR)/%) $(CMDS) := FILEMODE = 0555 diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh new file mode 100644 index 0000000000..f9c1ec4a5f --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh @@ -0,0 +1,172 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Test that receiving a full send as a clone works correctly. +# +# STRATEGY: +# 1. Create pool and filesystems. +# 2. Send filesystem, receive as clone of itself. +# 3. Verify that nop-write saves space. +# 4. Send filesystem, receive as clone of other filesystem. +# 5. Verify that contents are correct. +# 6. Repeat steps 4 and 5 with filesystems swapped. +# + +verify_runnable "both" + +fs=$TESTPOOL/$TESTFS/base/fs +fs2=$TESTPOOL/$TESTFS/base/fs2 +rfs=$TESTPOOL/$TESTFS/base/rfs + +function make_object +{ + local objnum=$1 + local mntpnt=$2 + local type=$3 + if [[ $type == "file" ]]; then + $DD if=/dev/urandom of=${mntpnt}/f$objnum bs=512 count=16 + elif [[ $type == "hole1" ]]; then + $DD if=/dev/urandom of=${mntpnt}/fh$objnum bs=512 count=5 stride=4 + elif [[ $type == "hole2" ]]; then + $DD if=/dev/urandom of=${mntpnt}/fh$objnum bs=512 count=4 stride=5 + elif [[ $type == "directory" ]]; then + $MKDIR ${mntpnt}/d$objnum + elif [[ $type == "missing" ]]; then + $TOUCH ${mntpnt}/h$objnum + fi +} + +function create_pair +{ + local objnum=$1 + local mntpnt1=$2 + local mntpnt2=$3 + local type1=$4 + local type2=$5 + make_object $objnum $mntpnt1 $type1 + make_object $objnum $mntpnt2 $type2 +} + +function cleanup +{ + $ZFS destroy -Rf $TESTPOOL/$TESTFS/base + rm /tmp/zr010p* +} + +log_assert "zfs receive of full send as clone should work" +log_onexit cleanup +log_must $ZFS create -o checksum=sha256 -o compression=gzip -o recordsize=512 \ + $TESTPOOL/$TESTFS/base + +log_must $ZFS create $fs +log_must $ZFS create $fs2 +mntpnt=$(get_prop mountpoint $fs) +mntpnt2=$(get_prop mountpoint $fs2) + +# +# Now, we create the two filesystems. By creating objects with +# different types and the same object number in each filesystem, we +# create a situation where, when you receive the full send of each as +# a clone of the other, we will test to ensure that the code correctly +# handles receiving all object types onto all other object types. +# + +# Receive a file onto a file (and vice versa). +create_pair 8 $mntpnt $mntpnt2 "file" "file" + +# Receive a file onto a file with holes (and vice versa). +create_pair 9 $mntpnt $mntpnt2 "file" "hole1" + +# Receive a file onto a directory (and vice versa). +create_pair 10 $mntpnt $mntpnt2 "file" "directory" + +# Receive a file onto a missing object (and vice versa). +create_pair 11 $mntpnt $mntpnt2 "file" "missing" + +# Receive a file with holes onto a file with holes (and vice versa). +create_pair 12 $mntpnt $mntpnt2 "hole1" "hole2" + +# Receive a file with holes onto a directory (and vice versa). +create_pair 13 $mntpnt $mntpnt2 "hole1" "directory" + +# Receive a file with holes onto a missing object (and vice versa). +create_pair 14 $mntpnt $mntpnt2 "hole1" "missing" + +# Receive a directory onto a directory (and vice versa). +create_pair 15 $mntpnt $mntpnt2 "directory" "directory" + +# Receive a directory onto a missing object (and vice versa). +create_pair 16 $mntpnt $mntpnt2 "directory" "missing" + +# Receive a missing object onto a missing object (and vice versa). +create_pair 17 $mntpnt $mntpnt2 "missing" "missing" + +# Receive a file with a different record size onto a file (and vice versa). +log_must $ZFS set recordsize=128k $fs +$DD if=/dev/urandom of=$mntpnt/f18 bs=128k count=64 +$TOUCH $mntpnt2/f18 + +# Remove objects that are intended to be missing. +$RM $mntpnt/h17 +$RM $mntpnt2/h* + +log_must $ZFS snapshot $fs@s1 +log_must $ZFS snapshot $fs2@s1 + +log_must $ZFS send $fs@s1 > /tmp/zr010p +log_must $ZFS send $fs2@s1 > /tmp/zr010p2 + + +# +# Test that, when we receive a full send as a clone of itself, +# nop-write saves us all the space used by data blocks. +# +cat /tmp/zr010p | log_must $ZFS receive -o origin=$fs@s1 $rfs +size=$(get_prop used $rfs) +size2=$(get_prop used $fs) +if [[ $size -ge $(($size2 / 10)) ]] then + log_fail "nop-write failure; expected usage less than "\ + "$(($size2 / 10)), but is using $size" +fi +log_must $ZFS destroy -fr $rfs + +# Correctness testing: receive each full send as a clone of the other fiesystem. +cat /tmp/zr010p | log_must $ZFS receive -o origin=$fs2@s1 $rfs +mntpnt_old=$(get_prop mountpoint $fs) +mntpnt_new=$(get_prop mountpoint $rfs) +log_must $DIFF -r $mntpnt_old $mntpnt_new +log_must $ZFS destroy -r $rfs + +cat /tmp/zr010p2 | log_must $ZFS receive -o origin=$fs@s1 $rfs +mntpnt_old=$(get_prop mountpoint $fs2) +mntpnt_new=$(get_prop mountpoint $rfs) +log_must $DIFF -r $mntpnt_old $mntpnt_new + +log_pass "zfs receive of full send as clone works" diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index e1614f4e29..579592ed07 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -137,6 +137,14 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) return (0); } +/* + * Fill in the drr_free struct, or perform aggregation if the previous record is + * also a free record, and the two are adjacent. + * + * Note that we send free records even for a full send, because we want to be + * able to receive a full send as a clone, which requires a list of all the free + * and freeobject records that were generated on the source. + */ static int dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, uint64_t length) @@ -160,15 +168,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, (object == dsp->dsa_last_data_object && offset > dsp->dsa_last_data_offset)); - /* - * If we are doing a non-incremental send, then there can't - * be any data in the dataset we're receiving into. Therefore - * a free record would simply be a no-op. Save space by not - * sending it to begin with. - */ - if (!dsp->dsa_incremental) - return (0); - if (length != -1ULL && offset + length < offset) length = -1ULL; @@ -347,10 +346,6 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) { struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); - /* See comment in dump_free(). */ - if (!dsp->dsa_incremental) - return (0); - /* * If there is a pending op, but it's not PENDING_FREEOBJECTS, * push it out, since free block aggregation can only be done for @@ -750,6 +745,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid; if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS; if (ancestor_zb != NULL) { drr->drr_u.drr_begin.drr_fromguid = @@ -772,7 +768,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, dsp->dsa_off = off; dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid; dsp->dsa_pending_op = PENDING_NONE; - dsp->dsa_incremental = (ancestor_zb != NULL); dsp->dsa_featureflags = featureflags; dsp->dsa_resume_object = resumeobj; dsp->dsa_resume_offset = resumeoff; @@ -1286,7 +1281,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* target fs already exists; recv into temp clone */ /* Can't recv a clone into an existing fs */ - if (flags & DRR_FLAG_CLONE) { + if (flags & DRR_FLAG_CLONE || drba->drba_origin) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } @@ -1305,6 +1300,15 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) drba->drba_origin)) return (SET_ERROR(ENOENT)); + /* + * If we're receiving a full send as a clone, and it doesn't + * contain all the necessary free records and freeobject + * records, reject it. + */ + if (fromguid == 0 && drba->drba_origin && + !(flags & DRR_FLAG_FREERECORDS)) + return (SET_ERROR(EINVAL)); + /* Open the parent of tofs */ ASSERT3U(strlen(tofs), <, MAXNAMELEN); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); @@ -1344,7 +1348,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } - if (dsl_dataset_phys(origin)->ds_guid != fromguid) { + if (dsl_dataset_phys(origin)->ds_guid != fromguid && + fromguid != 0) { dsl_dataset_rele(origin, FTAG); dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENODEV)); @@ -1674,6 +1679,20 @@ struct receive_writer_arg { uint64_t bytes_read; /* bytes read when current record created */ }; +struct objlist { + list_t list; /* List of struct receive_objnode. */ + /* + * Last object looked up. Used to assert that objects are being looked + * up in ascending order. + */ + uint64_t last_lookup; +}; + +struct receive_objnode { + list_node_t node; + uint64_t object; +}; + struct receive_arg { objset_t *os; vnode_t *vp; /* The vnode to read the stream from */ @@ -1691,12 +1710,7 @@ struct receive_arg { int err; boolean_t byteswap; /* Sorted list of objects not to issue prefetches for. */ - list_t ignore_obj_list; -}; - -struct receive_ign_obj_node { - list_node_t node; - uint64_t object; + struct objlist ignore_objlist; }; typedef struct guid_map_entry { @@ -2008,13 +2022,14 @@ receive_freeobjects(struct receive_writer_arg *rwa, struct drr_freeobjects *drrfo) { uint64_t obj; + int next_err = 0; if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) return (SET_ERROR(EINVAL)); for (obj = drrfo->drr_firstobj; - obj < drrfo->drr_firstobj + drrfo->drr_numobjs; - (void) dmu_object_next(rwa->os, &obj, FALSE, 0)) { + obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0; + next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) { int err; if (dmu_object_info(rwa->os, obj, NULL) != 0) @@ -2024,7 +2039,8 @@ receive_freeobjects(struct receive_writer_arg *rwa, if (err != 0) return (err); } - + if (next_err != ESRCH) + return (next_err); return (0); } @@ -2354,6 +2370,66 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf) return (0); } +static void +objlist_create(struct objlist *list) +{ + list_create(&list->list, sizeof (struct receive_objnode), + offsetof(struct receive_objnode, node)); + list->last_lookup = 0; +} + +static void +objlist_destroy(struct objlist *list) +{ + for (struct receive_objnode *n = list_remove_head(&list->list); + n != NULL; n = list_remove_head(&list->list)) { + kmem_free(n, sizeof (*n)); + } + list_destroy(&list->list); +} + +/* + * This function looks through the objlist to see if the specified object number + * is contained in the objlist. In the process, it will remove all object + * numbers in the list that are smaller than the specified object number. Thus, + * any lookup of an object number smaller than a previously looked up object + * number will always return false; therefore, all lookups should be done in + * ascending order. + */ +static boolean_t +objlist_exists(struct objlist *list, uint64_t object) +{ + struct receive_objnode *node = list_head(&list->list); + ASSERT3U(object, >=, list->last_lookup); + list->last_lookup = object; + while (node != NULL && node->object < object) { + VERIFY3P(node, ==, list_remove_head(&list->list)); + kmem_free(node, sizeof (*node)); + node = list_head(&list->list); + } + return (node != NULL && node->object == object); +} + +/* + * The objlist is a list of object numbers stored in ascending order. However, + * the insertion of new object numbers does not seek out the correct location to + * store a new object number; instead, it appends it to the list for simplicity. + * Thus, any users must take care to only insert new object numbers in ascending + * order. + */ +static void +objlist_insert(struct objlist *list, uint64_t object) +{ + struct receive_objnode *node = kmem_zalloc(sizeof (*node), KM_SLEEP); + node->object = object; +#ifdef ZFS_DEBUG + struct receive_objnode *last_object = list_tail(&list->list); + uint64_t last_objnum = (last_object != NULL ? last_object->object : 0); + ASSERT3U(node->object, >, last_objnum); +#endif + list_insert_tail(&list->list, node); +} + /* * Issue the prefetch reads for any necessary indirect blocks. * @@ -2376,13 +2452,7 @@ static void receive_read_prefetch(struct receive_arg *ra, uint64_t object, uint64_t offset, uint64_t length) { - struct receive_ign_obj_node *node = list_head(&ra->ignore_obj_list); - while (node != NULL && node->object < object) { - VERIFY3P(node, ==, list_remove_head(&ra->ignore_obj_list)); - kmem_free(node, sizeof (*node)); - node = list_head(&ra->ignore_obj_list); - } - if (node == NULL || node->object > object) { + if (!objlist_exists(&ra->ignore_objlist, object)) { dmu_prefetch(ra->os, object, 1, offset, length, ZIO_PRIORITY_SYNC_READ); } @@ -2419,18 +2489,7 @@ receive_read_record(struct receive_arg *ra) */ if (err == ENOENT || (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) { - struct receive_ign_obj_node *node = - kmem_zalloc(sizeof (*node), - KM_SLEEP); - node->object = drro->drr_object; -#ifdef ZFS_DEBUG - struct receive_ign_obj_node *last_object = - list_tail(&ra->ignore_obj_list); - uint64_t last_objnum = (last_object != NULL ? - last_object->object : 0); - ASSERT3U(node->object, >, last_objnum); -#endif - list_insert_tail(&ra->ignore_obj_list, node); + objlist_insert(&ra->ignore_objlist, drro->drr_object); err = 0; } return (err); @@ -2647,7 +2706,6 @@ resume_check(struct receive_arg *ra, nvlist_t *begin_nvl) return (0); } - /* * Read in the stream's records, one by one, and apply them to the pool. There * are two threads involved; the thread that calls this function will spin up a @@ -2681,8 +2739,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, sizeof (ra.bytes_read), 1, &ra.bytes_read); } - list_create(&ra.ignore_obj_list, sizeof (struct receive_ign_obj_node), - offsetof(struct receive_ign_obj_node, node)); + objlist_create(&ra.ignore_objlist); /* these were verified in dmu_recv_begin */ ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, @@ -2836,12 +2893,7 @@ out: } *voffp = ra.voff; - for (struct receive_ign_obj_node *n = - list_remove_head(&ra.ignore_obj_list); n != NULL; - n = list_remove_head(&ra.ignore_obj_list)) { - kmem_free(n, sizeof (*n)); - } - list_destroy(&ra.ignore_obj_list); + objlist_destroy(&ra.ignore_objlist); return (err); } diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h index 00be9dc725..8f3b27ff3f 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h @@ -24,7 +24,7 @@ */ /* * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ #ifndef _SYS_DMU_IMPL_H @@ -293,7 +293,6 @@ typedef struct dmu_sendarg { uint64_t dsa_toguid; int dsa_err; dmu_pendop_t dsa_pending_op; - boolean_t dsa_incremental; uint64_t dsa_featureflags; uint64_t dsa_last_data_object; uint64_t dsa_last_data_offset; diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h index 47799ff657..8fc49c7fd4 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_IOCTL_H @@ -126,6 +126,16 @@ typedef enum dmu_send_resume_token_version { #define DRR_FLAG_CLONE (1<<0) #define DRR_FLAG_CI_DATA (1<<1) +/* + * This send stream, if it is a full send, includes the FREE and FREEOBJECT + * records that are created by the sending process. This means that the send + * stream can be received as a clone, even though it is not an incremental. + * This is not implemented as a feature flag, because the receiving side does + * not need to have implemented it to receive this stream; it is fully backwards + * compatible. We need a flag, though, because full send streams without it + * cannot necessarily be received as a clone correctly. + */ +#define DRR_FLAG_FREERECORDS (1<<2) /* * flags in the drr_checksumflags field in the DRR_WRITE and diff --git a/usr/src/uts/common/io/signalfd.c b/usr/src/uts/common/io/signalfd.c index c5e2f398e0..850f321125 100644 --- a/usr/src/uts/common/io/signalfd.c +++ b/usr/src/uts/common/io/signalfd.c @@ -139,7 +139,6 @@ struct signalfd_state { */ static kmutex_t signalfd_lock; /* lock protecting state */ static dev_info_t *signalfd_devi; /* device info */ -static major_t signalfd_major; static id_space_t *signalfd_minor; /* minor number arena */ static void *signalfd_softstate; /* softstate pointer */ static signalfd_state_t *signalfd_state; /* global list of state */ @@ -222,7 +221,7 @@ signalfd_wake_list_cleanup(proc_t *p) } static void -signalfd_exit_helper() +signalfd_exit_helper(void) { proc_t *p = curproc; list_t *lst; @@ -288,7 +287,7 @@ signalfd_pollwake_cb(void *arg0, int sig) } } -/*ARGSUSED*/ +_NOTE(ARGSUSED(1)) static int signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) { @@ -440,7 +439,7 @@ consume_signal(k_sigset_t set, uio_t *uio, boolean_t block) * signal within our specified set is posted. We consume as many available * signals within our set as we can. */ -/*ARGSUSED*/ +_NOTE(ARGSUSED(2)) static int signalfd_read(dev_t dev, uio_t *uio, cred_t *cr) { @@ -499,7 +498,7 @@ signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set) set.__sigbits[2]) & FILLSET2)); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(4)) static int signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) @@ -559,7 +558,7 @@ signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, return (0); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(4)) static int signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) { @@ -571,7 +570,8 @@ signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) switch (cmd) { case SIGNALFDIOC_MASK: - if (copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t))) + if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t), + md) != 0) return (set_errno(EFAULT)); mutex_enter(&state->sfd_lock); @@ -587,7 +587,7 @@ signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (ENOTTY); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(1)) static int signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) { @@ -623,7 +623,6 @@ signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) return (0); } -/*ARGSUSED*/ static int signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { @@ -633,12 +632,15 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) mutex_enter(&signalfd_lock); signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1); - if (!signalfd_minor) + if (signalfd_minor == NULL) { + cmn_err(CE_WARN, "signalfd couldn't create id space"); + mutex_exit(&signalfd_lock); return (DDI_FAILURE); + } if (ddi_soft_state_init(&signalfd_softstate, sizeof (signalfd_state_t), 0) != 0) { - cmn_err(CE_NOTE, "/dev/signalfd failed to create soft state"); + cmn_err(CE_WARN, "signalfd failed to create soft state"); id_space_destroy(signalfd_minor); mutex_exit(&signalfd_lock); return (DDI_FAILURE); @@ -655,7 +657,6 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ddi_report_dev(devi); signalfd_devi = devi; - signalfd_major = ddi_driver_major(signalfd_devi); sigfd_exit_helper = signalfd_exit_helper; @@ -664,7 +665,7 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_SUCCESS); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(0)) static int signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { @@ -672,9 +673,6 @@ signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) case DDI_DETACH: break; - case DDI_SUSPEND: - return (DDI_SUCCESS); - default: return (DDI_FAILURE); } @@ -695,7 +693,7 @@ signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) return (DDI_SUCCESS); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(0)) static int signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) { diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c index 5ef12f3ae4..b3887c16c2 100644 --- a/usr/src/uts/common/os/sig.c +++ b/usr/src/uts/common/os/sig.c @@ -60,7 +60,6 @@ #include <sys/cyclic.h> #include <sys/dtrace.h> #include <sys/sdt.h> -#include <sys/brand.h> #include <sys/signalfd.h> const k_sigset_t nullsmask = {0, 0, 0}; |