summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2015-11-17 12:00:11 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2015-11-17 12:00:11 +0000
commit275d73b3f371fe2c5b2e67a1c86ef4b5781ac33c (patch)
tree9d7d1d59320de0686fa2fbf770be55d114ee628d /usr/src/uts/common
parentd3ca33877d23e01eff12715a6c2466f5340f622d (diff)
parent68ecb2ec930c4b0f00acaf8e0abb2b19c4b8b76f (diff)
downloadillumos-joyent-275d73b3f371fe2c5b2e67a1c86ef4b5781ac33c.tar.gz
[illumos-gate merge]
commit 68ecb2ec930c4b0f00acaf8e0abb2b19c4b8b76f 6393 zfs receive a full send as a clone commit 3d729aecc03ea6ebb9bd5d56b8dccd24f57daa41 6342 want signalfd support commit f9eb9fdf196b6ed476e4ffc69cecd8b0da3cb7e7 6451 ztest fails due to checksum errors Conflicts: usr/src/uts/sparc/Makefile.sparc usr/src/uts/common/os/sig.c usr/src/uts/common/io/signalfd.c
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_send.c158
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu_impl.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h12
-rw-r--r--usr/src/uts/common/io/signalfd.c32
-rw-r--r--usr/src/uts/common/os/sig.c1
5 files changed, 132 insertions, 74 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c
index e1614f4e29..579592ed07 100644
--- a/usr/src/uts/common/fs/zfs/dmu_send.c
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c
@@ -137,6 +137,14 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
return (0);
}
+/*
+ * Fill in the drr_free struct, or perform aggregation if the previous record is
+ * also a free record, and the two are adjacent.
+ *
+ * Note that we send free records even for a full send, because we want to be
+ * able to receive a full send as a clone, which requires a list of all the free
+ * and freeobject records that were generated on the source.
+ */
static int
dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
uint64_t length)
@@ -160,15 +168,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
(object == dsp->dsa_last_data_object &&
offset > dsp->dsa_last_data_offset));
- /*
- * If we are doing a non-incremental send, then there can't
- * be any data in the dataset we're receiving into. Therefore
- * a free record would simply be a no-op. Save space by not
- * sending it to begin with.
- */
- if (!dsp->dsa_incremental)
- return (0);
-
if (length != -1ULL && offset + length < offset)
length = -1ULL;
@@ -347,10 +346,6 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
{
struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
- /* See comment in dump_free(). */
- if (!dsp->dsa_incremental)
- return (0);
-
/*
* If there is a pending op, but it's not PENDING_FREEOBJECTS,
* push it out, since free block aggregation can only be done for
@@ -750,6 +745,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
+ drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
if (ancestor_zb != NULL) {
drr->drr_u.drr_begin.drr_fromguid =
@@ -772,7 +768,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsp->dsa_off = off;
dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
dsp->dsa_pending_op = PENDING_NONE;
- dsp->dsa_incremental = (ancestor_zb != NULL);
dsp->dsa_featureflags = featureflags;
dsp->dsa_resume_object = resumeobj;
dsp->dsa_resume_offset = resumeoff;
@@ -1286,7 +1281,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/* target fs already exists; recv into temp clone */
/* Can't recv a clone into an existing fs */
- if (flags & DRR_FLAG_CLONE) {
+ if (flags & DRR_FLAG_CLONE || drba->drba_origin) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -1305,6 +1300,15 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
drba->drba_origin))
return (SET_ERROR(ENOENT));
+ /*
+ * If we're receiving a full send as a clone, and it doesn't
+ * contain all the necessary free records and freeobject
+ * records, reject it.
+ */
+ if (fromguid == 0 && drba->drba_origin &&
+ !(flags & DRR_FLAG_FREERECORDS))
+ return (SET_ERROR(EINVAL));
+
/* Open the parent of tofs */
ASSERT3U(strlen(tofs), <, MAXNAMELEN);
(void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
@@ -1344,7 +1348,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
- if (dsl_dataset_phys(origin)->ds_guid != fromguid) {
+ if (dsl_dataset_phys(origin)->ds_guid != fromguid &&
+ fromguid != 0) {
dsl_dataset_rele(origin, FTAG);
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(ENODEV));
@@ -1674,6 +1679,20 @@ struct receive_writer_arg {
uint64_t bytes_read; /* bytes read when current record created */
};
+struct objlist {
+ list_t list; /* List of struct receive_objnode. */
+ /*
+ * Last object looked up. Used to assert that objects are being looked
+ * up in ascending order.
+ */
+ uint64_t last_lookup;
+};
+
+struct receive_objnode {
+ list_node_t node;
+ uint64_t object;
+};
+
struct receive_arg {
objset_t *os;
vnode_t *vp; /* The vnode to read the stream from */
@@ -1691,12 +1710,7 @@ struct receive_arg {
int err;
boolean_t byteswap;
/* Sorted list of objects not to issue prefetches for. */
- list_t ignore_obj_list;
-};
-
-struct receive_ign_obj_node {
- list_node_t node;
- uint64_t object;
+ struct objlist ignore_objlist;
};
typedef struct guid_map_entry {
@@ -2008,13 +2022,14 @@ receive_freeobjects(struct receive_writer_arg *rwa,
struct drr_freeobjects *drrfo)
{
uint64_t obj;
+ int next_err = 0;
if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
return (SET_ERROR(EINVAL));
for (obj = drrfo->drr_firstobj;
- obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
- (void) dmu_object_next(rwa->os, &obj, FALSE, 0)) {
+ obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0;
+ next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) {
int err;
if (dmu_object_info(rwa->os, obj, NULL) != 0)
@@ -2024,7 +2039,8 @@ receive_freeobjects(struct receive_writer_arg *rwa,
if (err != 0)
return (err);
}
-
+ if (next_err != ESRCH)
+ return (next_err);
return (0);
}
@@ -2354,6 +2370,66 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
return (0);
}
+static void
+objlist_create(struct objlist *list)
+{
+ list_create(&list->list, sizeof (struct receive_objnode),
+ offsetof(struct receive_objnode, node));
+ list->last_lookup = 0;
+}
+
+static void
+objlist_destroy(struct objlist *list)
+{
+ for (struct receive_objnode *n = list_remove_head(&list->list);
+ n != NULL; n = list_remove_head(&list->list)) {
+ kmem_free(n, sizeof (*n));
+ }
+ list_destroy(&list->list);
+}
+
+/*
+ * This function looks through the objlist to see if the specified object number
+ * is contained in the objlist. In the process, it will remove all object
+ * numbers in the list that are smaller than the specified object number. Thus,
+ * any lookup of an object number smaller than a previously looked up object
+ * number will always return false; therefore, all lookups should be done in
+ * ascending order.
+ */
+static boolean_t
+objlist_exists(struct objlist *list, uint64_t object)
+{
+ struct receive_objnode *node = list_head(&list->list);
+ ASSERT3U(object, >=, list->last_lookup);
+ list->last_lookup = object;
+ while (node != NULL && node->object < object) {
+ VERIFY3P(node, ==, list_remove_head(&list->list));
+ kmem_free(node, sizeof (*node));
+ node = list_head(&list->list);
+ }
+ return (node != NULL && node->object == object);
+}
+
+/*
+ * The objlist is a list of object numbers stored in ascending order. However,
+ * the insertion of new object numbers does not seek out the correct location to
+ * store a new object number; instead, it appends it to the list for simplicity.
+ * Thus, any users must take care to only insert new object numbers in ascending
+ * order.
+ */
+static void
+objlist_insert(struct objlist *list, uint64_t object)
+{
+ struct receive_objnode *node = kmem_zalloc(sizeof (*node), KM_SLEEP);
+ node->object = object;
+#ifdef ZFS_DEBUG
+ struct receive_objnode *last_object = list_tail(&list->list);
+ uint64_t last_objnum = (last_object != NULL ? last_object->object : 0);
+ ASSERT3U(node->object, >, last_objnum);
+#endif
+ list_insert_tail(&list->list, node);
+}
+
/*
* Issue the prefetch reads for any necessary indirect blocks.
*
@@ -2376,13 +2452,7 @@ static void
receive_read_prefetch(struct receive_arg *ra,
uint64_t object, uint64_t offset, uint64_t length)
{
- struct receive_ign_obj_node *node = list_head(&ra->ignore_obj_list);
- while (node != NULL && node->object < object) {
- VERIFY3P(node, ==, list_remove_head(&ra->ignore_obj_list));
- kmem_free(node, sizeof (*node));
- node = list_head(&ra->ignore_obj_list);
- }
- if (node == NULL || node->object > object) {
+ if (!objlist_exists(&ra->ignore_objlist, object)) {
dmu_prefetch(ra->os, object, 1, offset, length,
ZIO_PRIORITY_SYNC_READ);
}
@@ -2419,18 +2489,7 @@ receive_read_record(struct receive_arg *ra)
*/
if (err == ENOENT ||
(err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
- struct receive_ign_obj_node *node =
- kmem_zalloc(sizeof (*node),
- KM_SLEEP);
- node->object = drro->drr_object;
-#ifdef ZFS_DEBUG
- struct receive_ign_obj_node *last_object =
- list_tail(&ra->ignore_obj_list);
- uint64_t last_objnum = (last_object != NULL ?
- last_object->object : 0);
- ASSERT3U(node->object, >, last_objnum);
-#endif
- list_insert_tail(&ra->ignore_obj_list, node);
+ objlist_insert(&ra->ignore_objlist, drro->drr_object);
err = 0;
}
return (err);
@@ -2647,7 +2706,6 @@ resume_check(struct receive_arg *ra, nvlist_t *begin_nvl)
return (0);
}
-
/*
* Read in the stream's records, one by one, and apply them to the pool. There
* are two threads involved; the thread that calls this function will spin up a
@@ -2681,8 +2739,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
sizeof (ra.bytes_read), 1, &ra.bytes_read);
}
- list_create(&ra.ignore_obj_list, sizeof (struct receive_ign_obj_node),
- offsetof(struct receive_ign_obj_node, node));
+ objlist_create(&ra.ignore_objlist);
/* these were verified in dmu_recv_begin */
ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
@@ -2836,12 +2893,7 @@ out:
}
*voffp = ra.voff;
- for (struct receive_ign_obj_node *n =
- list_remove_head(&ra.ignore_obj_list); n != NULL;
- n = list_remove_head(&ra.ignore_obj_list)) {
- kmem_free(n, sizeof (*n));
- }
- list_destroy(&ra.ignore_obj_list);
+ objlist_destroy(&ra.ignore_objlist);
return (err);
}
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h
index 00be9dc725..8f3b27ff3f 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h
@@ -24,7 +24,7 @@
*/
/*
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_IMPL_H
@@ -293,7 +293,6 @@ typedef struct dmu_sendarg {
uint64_t dsa_toguid;
int dsa_err;
dmu_pendop_t dsa_pending_op;
- boolean_t dsa_incremental;
uint64_t dsa_featureflags;
uint64_t dsa_last_data_object;
uint64_t dsa_last_data_offset;
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 47799ff657..8fc49c7fd4 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_IOCTL_H
@@ -126,6 +126,16 @@ typedef enum dmu_send_resume_token_version {
#define DRR_FLAG_CLONE (1<<0)
#define DRR_FLAG_CI_DATA (1<<1)
+/*
+ * This send stream, if it is a full send, includes the FREE and FREEOBJECT
+ * records that are created by the sending process. This means that the send
+ * stream can be received as a clone, even though it is not an incremental.
+ * This is not implemented as a feature flag, because the receiving side does
+ * not need to have implemented it to receive this stream; it is fully backwards
+ * compatible. We need a flag, though, because full send streams without it
+ * cannot necessarily be received as a clone correctly.
+ */
+#define DRR_FLAG_FREERECORDS (1<<2)
/*
* flags in the drr_checksumflags field in the DRR_WRITE and
diff --git a/usr/src/uts/common/io/signalfd.c b/usr/src/uts/common/io/signalfd.c
index c5e2f398e0..850f321125 100644
--- a/usr/src/uts/common/io/signalfd.c
+++ b/usr/src/uts/common/io/signalfd.c
@@ -139,7 +139,6 @@ struct signalfd_state {
*/
static kmutex_t signalfd_lock; /* lock protecting state */
static dev_info_t *signalfd_devi; /* device info */
-static major_t signalfd_major;
static id_space_t *signalfd_minor; /* minor number arena */
static void *signalfd_softstate; /* softstate pointer */
static signalfd_state_t *signalfd_state; /* global list of state */
@@ -222,7 +221,7 @@ signalfd_wake_list_cleanup(proc_t *p)
}
static void
-signalfd_exit_helper()
+signalfd_exit_helper(void)
{
proc_t *p = curproc;
list_t *lst;
@@ -288,7 +287,7 @@ signalfd_pollwake_cb(void *arg0, int sig)
}
}
-/*ARGSUSED*/
+_NOTE(ARGSUSED(1))
static int
signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
{
@@ -440,7 +439,7 @@ consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
* signal within our specified set is posted. We consume as many available
* signals within our set as we can.
*/
-/*ARGSUSED*/
+_NOTE(ARGSUSED(2))
static int
signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
{
@@ -499,7 +498,7 @@ signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
set.__sigbits[2]) & FILLSET2));
}
-/*ARGSUSED*/
+_NOTE(ARGSUSED(4))
static int
signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp)
@@ -559,7 +558,7 @@ signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
return (0);
}
-/*ARGSUSED*/
+_NOTE(ARGSUSED(4))
static int
signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
{
@@ -571,7 +570,8 @@ signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
switch (cmd) {
case SIGNALFDIOC_MASK:
- if (copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t)))
+ if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
+ md) != 0)
return (set_errno(EFAULT));
mutex_enter(&state->sfd_lock);
@@ -587,7 +587,7 @@ signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
return (ENOTTY);
}
-/*ARGSUSED*/
+_NOTE(ARGSUSED(1))
static int
signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
{
@@ -623,7 +623,6 @@ signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
return (0);
}
-/*ARGSUSED*/
static int
signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
@@ -633,12 +632,15 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
mutex_enter(&signalfd_lock);
signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
- if (!signalfd_minor)
+ if (signalfd_minor == NULL) {
+ cmn_err(CE_WARN, "signalfd couldn't create id space");
+ mutex_exit(&signalfd_lock);
return (DDI_FAILURE);
+ }
if (ddi_soft_state_init(&signalfd_softstate,
sizeof (signalfd_state_t), 0) != 0) {
- cmn_err(CE_NOTE, "/dev/signalfd failed to create soft state");
+ cmn_err(CE_WARN, "signalfd failed to create soft state");
id_space_destroy(signalfd_minor);
mutex_exit(&signalfd_lock);
return (DDI_FAILURE);
@@ -655,7 +657,6 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
ddi_report_dev(devi);
signalfd_devi = devi;
- signalfd_major = ddi_driver_major(signalfd_devi);
sigfd_exit_helper = signalfd_exit_helper;
@@ -664,7 +665,7 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
return (DDI_SUCCESS);
}
-/*ARGSUSED*/
+_NOTE(ARGSUSED(0))
static int
signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
@@ -672,9 +673,6 @@ signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
case DDI_DETACH:
break;
- case DDI_SUSPEND:
- return (DDI_SUCCESS);
-
default:
return (DDI_FAILURE);
}
@@ -695,7 +693,7 @@ signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
return (DDI_SUCCESS);
}
-/*ARGSUSED*/
+_NOTE(ARGSUSED(0))
static int
signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c
index 5ef12f3ae4..b3887c16c2 100644
--- a/usr/src/uts/common/os/sig.c
+++ b/usr/src/uts/common/os/sig.c
@@ -60,7 +60,6 @@
#include <sys/cyclic.h>
#include <sys/dtrace.h>
#include <sys/sdt.h>
-#include <sys/brand.h>
#include <sys/signalfd.h>
const k_sigset_t nullsmask = {0, 0, 0};