diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2015-11-17 12:00:11 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2015-11-17 12:00:11 +0000 |
commit | 275d73b3f371fe2c5b2e67a1c86ef4b5781ac33c (patch) | |
tree | 9d7d1d59320de0686fa2fbf770be55d114ee628d /usr/src/uts/common | |
parent | d3ca33877d23e01eff12715a6c2466f5340f622d (diff) | |
parent | 68ecb2ec930c4b0f00acaf8e0abb2b19c4b8b76f (diff) | |
download | illumos-joyent-275d73b3f371fe2c5b2e67a1c86ef4b5781ac33c.tar.gz |
[illumos-gate merge]
commit 68ecb2ec930c4b0f00acaf8e0abb2b19c4b8b76f
6393 zfs receive a full send as a clone
commit 3d729aecc03ea6ebb9bd5d56b8dccd24f57daa41
6342 want signalfd support
commit f9eb9fdf196b6ed476e4ffc69cecd8b0da3cb7e7
6451 ztest fails due to checksum errors
Conflicts:
usr/src/uts/sparc/Makefile.sparc
usr/src/uts/common/os/sig.c
usr/src/uts/common/io/signalfd.c
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_send.c | 158 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu_impl.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h | 12 | ||||
-rw-r--r-- | usr/src/uts/common/io/signalfd.c | 32 | ||||
-rw-r--r-- | usr/src/uts/common/os/sig.c | 1 |
5 files changed, 132 insertions, 74 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index e1614f4e29..579592ed07 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -137,6 +137,14 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) return (0); } +/* + * Fill in the drr_free struct, or perform aggregation if the previous record is + * also a free record, and the two are adjacent. + * + * Note that we send free records even for a full send, because we want to be + * able to receive a full send as a clone, which requires a list of all the free + * and freeobject records that were generated on the source. + */ static int dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, uint64_t length) @@ -160,15 +168,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, (object == dsp->dsa_last_data_object && offset > dsp->dsa_last_data_offset)); - /* - * If we are doing a non-incremental send, then there can't - * be any data in the dataset we're receiving into. Therefore - * a free record would simply be a no-op. Save space by not - * sending it to begin with. - */ - if (!dsp->dsa_incremental) - return (0); - if (length != -1ULL && offset + length < offset) length = -1ULL; @@ -347,10 +346,6 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) { struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); - /* See comment in dump_free(). */ - if (!dsp->dsa_incremental) - return (0); - /* * If there is a pending op, but it's not PENDING_FREEOBJECTS, * push it out, since free block aggregation can only be done for @@ -750,6 +745,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid; if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS; if (ancestor_zb != NULL) { drr->drr_u.drr_begin.drr_fromguid = @@ -772,7 +768,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, dsp->dsa_off = off; dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid; dsp->dsa_pending_op = PENDING_NONE; - dsp->dsa_incremental = (ancestor_zb != NULL); dsp->dsa_featureflags = featureflags; dsp->dsa_resume_object = resumeobj; dsp->dsa_resume_offset = resumeoff; @@ -1286,7 +1281,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* target fs already exists; recv into temp clone */ /* Can't recv a clone into an existing fs */ - if (flags & DRR_FLAG_CLONE) { + if (flags & DRR_FLAG_CLONE || drba->drba_origin) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } @@ -1305,6 +1300,15 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) drba->drba_origin)) return (SET_ERROR(ENOENT)); + /* + * If we're receiving a full send as a clone, and it doesn't + * contain all the necessary free records and freeobject + * records, reject it. + */ + if (fromguid == 0 && drba->drba_origin && + !(flags & DRR_FLAG_FREERECORDS)) + return (SET_ERROR(EINVAL)); + /* Open the parent of tofs */ ASSERT3U(strlen(tofs), <, MAXNAMELEN); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); @@ -1344,7 +1348,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } - if (dsl_dataset_phys(origin)->ds_guid != fromguid) { + if (dsl_dataset_phys(origin)->ds_guid != fromguid && + fromguid != 0) { dsl_dataset_rele(origin, FTAG); dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENODEV)); @@ -1674,6 +1679,20 @@ struct receive_writer_arg { uint64_t bytes_read; /* bytes read when current record created */ }; +struct objlist { + list_t list; /* List of struct receive_objnode. */ + /* + * Last object looked up. Used to assert that objects are being looked + * up in ascending order. + */ + uint64_t last_lookup; +}; + +struct receive_objnode { + list_node_t node; + uint64_t object; +}; + struct receive_arg { objset_t *os; vnode_t *vp; /* The vnode to read the stream from */ @@ -1691,12 +1710,7 @@ struct receive_arg { int err; boolean_t byteswap; /* Sorted list of objects not to issue prefetches for. */ - list_t ignore_obj_list; -}; - -struct receive_ign_obj_node { - list_node_t node; - uint64_t object; + struct objlist ignore_objlist; }; typedef struct guid_map_entry { @@ -2008,13 +2022,14 @@ receive_freeobjects(struct receive_writer_arg *rwa, struct drr_freeobjects *drrfo) { uint64_t obj; + int next_err = 0; if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) return (SET_ERROR(EINVAL)); for (obj = drrfo->drr_firstobj; - obj < drrfo->drr_firstobj + drrfo->drr_numobjs; - (void) dmu_object_next(rwa->os, &obj, FALSE, 0)) { + obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0; + next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) { int err; if (dmu_object_info(rwa->os, obj, NULL) != 0) @@ -2024,7 +2039,8 @@ receive_freeobjects(struct receive_writer_arg *rwa, if (err != 0) return (err); } - + if (next_err != ESRCH) + return (next_err); return (0); } @@ -2354,6 +2370,66 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf) return (0); } +static void +objlist_create(struct objlist *list) +{ + list_create(&list->list, sizeof (struct receive_objnode), + offsetof(struct receive_objnode, node)); + list->last_lookup = 0; +} + +static void +objlist_destroy(struct objlist *list) +{ + for (struct receive_objnode *n = list_remove_head(&list->list); + n != NULL; n = list_remove_head(&list->list)) { + kmem_free(n, sizeof (*n)); + } + list_destroy(&list->list); +} + +/* + * This function looks through the objlist to see if the specified object number + * is contained in the objlist. In the process, it will remove all object + * numbers in the list that are smaller than the specified object number. Thus, + * any lookup of an object number smaller than a previously looked up object + * number will always return false; therefore, all lookups should be done in + * ascending order. + */ +static boolean_t +objlist_exists(struct objlist *list, uint64_t object) +{ + struct receive_objnode *node = list_head(&list->list); + ASSERT3U(object, >=, list->last_lookup); + list->last_lookup = object; + while (node != NULL && node->object < object) { + VERIFY3P(node, ==, list_remove_head(&list->list)); + kmem_free(node, sizeof (*node)); + node = list_head(&list->list); + } + return (node != NULL && node->object == object); +} + +/* + * The objlist is a list of object numbers stored in ascending order. However, + * the insertion of new object numbers does not seek out the correct location to + * store a new object number; instead, it appends it to the list for simplicity. + * Thus, any users must take care to only insert new object numbers in ascending + * order. + */ +static void +objlist_insert(struct objlist *list, uint64_t object) +{ + struct receive_objnode *node = kmem_zalloc(sizeof (*node), KM_SLEEP); + node->object = object; +#ifdef ZFS_DEBUG + struct receive_objnode *last_object = list_tail(&list->list); + uint64_t last_objnum = (last_object != NULL ? last_object->object : 0); + ASSERT3U(node->object, >, last_objnum); +#endif + list_insert_tail(&list->list, node); +} + /* * Issue the prefetch reads for any necessary indirect blocks. * @@ -2376,13 +2452,7 @@ static void receive_read_prefetch(struct receive_arg *ra, uint64_t object, uint64_t offset, uint64_t length) { - struct receive_ign_obj_node *node = list_head(&ra->ignore_obj_list); - while (node != NULL && node->object < object) { - VERIFY3P(node, ==, list_remove_head(&ra->ignore_obj_list)); - kmem_free(node, sizeof (*node)); - node = list_head(&ra->ignore_obj_list); - } - if (node == NULL || node->object > object) { + if (!objlist_exists(&ra->ignore_objlist, object)) { dmu_prefetch(ra->os, object, 1, offset, length, ZIO_PRIORITY_SYNC_READ); } @@ -2419,18 +2489,7 @@ receive_read_record(struct receive_arg *ra) */ if (err == ENOENT || (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) { - struct receive_ign_obj_node *node = - kmem_zalloc(sizeof (*node), - KM_SLEEP); - node->object = drro->drr_object; -#ifdef ZFS_DEBUG - struct receive_ign_obj_node *last_object = - list_tail(&ra->ignore_obj_list); - uint64_t last_objnum = (last_object != NULL ? - last_object->object : 0); - ASSERT3U(node->object, >, last_objnum); -#endif - list_insert_tail(&ra->ignore_obj_list, node); + objlist_insert(&ra->ignore_objlist, drro->drr_object); err = 0; } return (err); @@ -2647,7 +2706,6 @@ resume_check(struct receive_arg *ra, nvlist_t *begin_nvl) return (0); } - /* * Read in the stream's records, one by one, and apply them to the pool. There * are two threads involved; the thread that calls this function will spin up a @@ -2681,8 +2739,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, sizeof (ra.bytes_read), 1, &ra.bytes_read); } - list_create(&ra.ignore_obj_list, sizeof (struct receive_ign_obj_node), - offsetof(struct receive_ign_obj_node, node)); + objlist_create(&ra.ignore_objlist); /* these were verified in dmu_recv_begin */ ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, @@ -2836,12 +2893,7 @@ out: } *voffp = ra.voff; - for (struct receive_ign_obj_node *n = - list_remove_head(&ra.ignore_obj_list); n != NULL; - n = list_remove_head(&ra.ignore_obj_list)) { - kmem_free(n, sizeof (*n)); - } - list_destroy(&ra.ignore_obj_list); + objlist_destroy(&ra.ignore_objlist); return (err); } diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h index 00be9dc725..8f3b27ff3f 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h @@ -24,7 +24,7 @@ */ /* * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ #ifndef _SYS_DMU_IMPL_H @@ -293,7 +293,6 @@ typedef struct dmu_sendarg { uint64_t dsa_toguid; int dsa_err; dmu_pendop_t dsa_pending_op; - boolean_t dsa_incremental; uint64_t dsa_featureflags; uint64_t dsa_last_data_object; uint64_t dsa_last_data_offset; diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h index 47799ff657..8fc49c7fd4 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_IOCTL_H @@ -126,6 +126,16 @@ typedef enum dmu_send_resume_token_version { #define DRR_FLAG_CLONE (1<<0) #define DRR_FLAG_CI_DATA (1<<1) +/* + * This send stream, if it is a full send, includes the FREE and FREEOBJECT + * records that are created by the sending process. This means that the send + * stream can be received as a clone, even though it is not an incremental. + * This is not implemented as a feature flag, because the receiving side does + * not need to have implemented it to receive this stream; it is fully backwards + * compatible. We need a flag, though, because full send streams without it + * cannot necessarily be received as a clone correctly. + */ +#define DRR_FLAG_FREERECORDS (1<<2) /* * flags in the drr_checksumflags field in the DRR_WRITE and diff --git a/usr/src/uts/common/io/signalfd.c b/usr/src/uts/common/io/signalfd.c index c5e2f398e0..850f321125 100644 --- a/usr/src/uts/common/io/signalfd.c +++ b/usr/src/uts/common/io/signalfd.c @@ -139,7 +139,6 @@ struct signalfd_state { */ static kmutex_t signalfd_lock; /* lock protecting state */ static dev_info_t *signalfd_devi; /* device info */ -static major_t signalfd_major; static id_space_t *signalfd_minor; /* minor number arena */ static void *signalfd_softstate; /* softstate pointer */ static signalfd_state_t *signalfd_state; /* global list of state */ @@ -222,7 +221,7 @@ signalfd_wake_list_cleanup(proc_t *p) } static void -signalfd_exit_helper() +signalfd_exit_helper(void) { proc_t *p = curproc; list_t *lst; @@ -288,7 +287,7 @@ signalfd_pollwake_cb(void *arg0, int sig) } } -/*ARGSUSED*/ +_NOTE(ARGSUSED(1)) static int signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) { @@ -440,7 +439,7 @@ consume_signal(k_sigset_t set, uio_t *uio, boolean_t block) * signal within our specified set is posted. We consume as many available * signals within our set as we can. */ -/*ARGSUSED*/ +_NOTE(ARGSUSED(2)) static int signalfd_read(dev_t dev, uio_t *uio, cred_t *cr) { @@ -499,7 +498,7 @@ signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set) set.__sigbits[2]) & FILLSET2)); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(4)) static int signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) @@ -559,7 +558,7 @@ signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, return (0); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(4)) static int signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) { @@ -571,7 +570,8 @@ signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) switch (cmd) { case SIGNALFDIOC_MASK: - if (copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t))) + if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t), + md) != 0) return (set_errno(EFAULT)); mutex_enter(&state->sfd_lock); @@ -587,7 +587,7 @@ signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (ENOTTY); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(1)) static int signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) { @@ -623,7 +623,6 @@ signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) return (0); } -/*ARGSUSED*/ static int signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { @@ -633,12 +632,15 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) mutex_enter(&signalfd_lock); signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1); - if (!signalfd_minor) + if (signalfd_minor == NULL) { + cmn_err(CE_WARN, "signalfd couldn't create id space"); + mutex_exit(&signalfd_lock); return (DDI_FAILURE); + } if (ddi_soft_state_init(&signalfd_softstate, sizeof (signalfd_state_t), 0) != 0) { - cmn_err(CE_NOTE, "/dev/signalfd failed to create soft state"); + cmn_err(CE_WARN, "signalfd failed to create soft state"); id_space_destroy(signalfd_minor); mutex_exit(&signalfd_lock); return (DDI_FAILURE); @@ -655,7 +657,6 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ddi_report_dev(devi); signalfd_devi = devi; - signalfd_major = ddi_driver_major(signalfd_devi); sigfd_exit_helper = signalfd_exit_helper; @@ -664,7 +665,7 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_SUCCESS); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(0)) static int signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { @@ -672,9 +673,6 @@ signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) case DDI_DETACH: break; - case DDI_SUSPEND: - return (DDI_SUCCESS); - default: return (DDI_FAILURE); } @@ -695,7 +693,7 @@ signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) return (DDI_SUCCESS); } -/*ARGSUSED*/ +_NOTE(ARGSUSED(0)) static int signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) { diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c index 5ef12f3ae4..b3887c16c2 100644 --- a/usr/src/uts/common/os/sig.c +++ b/usr/src/uts/common/os/sig.c @@ -60,7 +60,6 @@ #include <sys/cyclic.h> #include <sys/dtrace.h> #include <sys/sdt.h> -#include <sys/brand.h> #include <sys/signalfd.h> const k_sigset_t nullsmask = {0, 0, 0}; |