diff options
Diffstat (limited to 'usr/src/uts/common/fs/zfs')
29 files changed, 454 insertions, 372 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 11fea14a91..864cffa615 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -149,9 +149,11 @@ int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { - objset_impl_t *winner, *osi; + objset_impl_t *osi; int i, err, checksum; + ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); + osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; @@ -245,12 +247,13 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); - if (ds != NULL) { - winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); - if (winner) { - dmu_objset_evict(ds, osi); - osi = winner; - } + /* + * We should be the only thread trying to do this because we + * have ds_opening_lock + */ + if (ds) { + VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, + dmu_objset_evict)); } *osip = osi; @@ -274,6 +277,7 @@ dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, return (err); } + mutex_enter(&ds->ds_opening_lock); osi = dsl_dataset_get_user_ptr(ds); if (osi == NULL) { err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), @@ -284,6 +288,7 @@ dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, return (err); } } + mutex_exit(&ds->ds_opening_lock); os->os = osi; os->os_mode = mode; @@ -304,7 +309,7 @@ dmu_objset_close(objset_t *os) } int -dmu_objset_evict_dbufs(objset_t *os, int try) +dmu_objset_evict_dbufs(objset_t *os, boolean_t try) { objset_impl_t *osi = os->os; dnode_t *dn; @@ -402,7 +407,11 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, dnode_t *mdn; ASSERT(dmu_tx_is_syncing(tx)); + if (ds) + mutex_enter(&ds->ds_opening_lock); VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &osi)); + if (ds) + mutex_exit(&ds->ds_opening_lock); mdn = osi->os_meta_dnode; dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, @@ -802,9 +811,10 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; - if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) + if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { dsl_dataset_block_kill(os->os_dsl_dataset, os->os_rootbp, pio, tx); + } zio = arc_write(pio, os->os_spa, os->os_md_checksum, os->os_md_compress, dmu_get_replication_level(os, &zb, DMU_OT_OBJSET), diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index 04758568be..135adcfde6 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -350,7 +350,7 @@ dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) * Try to kick all the dnodes dbufs out of the cache... */ int -dnode_evict_dbufs(dnode_t *dn, int try) +dnode_evict_dbufs(dnode_t *dn, boolean_t try) { int progress; int pass = 0; diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index 59482ce0df..d18a721084 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -218,7 +218,6 @@ static void dsl_dataset_evict(dmu_buf_t *db, void *dsv) { dsl_dataset_t *ds = dsv; - dsl_pool_t *dp = ds->ds_dir->dd_pool; /* open_refcount == DS_REF_MAX when deleting */ ASSERT(ds->ds_open_refcount == 0 || @@ -226,7 +225,7 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv) dprintf_ds(ds, "evicting %s\n", ""); - unique_remove(ds->ds_phys->ds_fsid_guid); + unique_remove(ds->ds_fsid_guid); if (ds->ds_user_ptr != NULL) ds->ds_user_evict_func(ds, ds->ds_user_ptr); @@ -239,10 +238,10 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv) bplist_close(&ds->ds_deadlist); dsl_dir_close(ds->ds_dir, ds); - if (list_link_active(&ds->ds_synced_link)) - list_remove(&dp->dp_synced_objsets, ds); + ASSERT(!list_link_active(&ds->ds_synced_link)); mutex_destroy(&ds->ds_lock); + mutex_destroy(&ds->ds_opening_lock); mutex_destroy(&ds->ds_deadlist.bpl_lock); kmem_free(ds, sizeof (dsl_dataset_t)); @@ -299,6 +298,7 @@ dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, ds->ds_phys = dbuf->db_data; mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, NULL); @@ -314,6 +314,7 @@ dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, * just opened it. */ mutex_destroy(&ds->ds_lock); + mutex_destroy(&ds->ds_opening_lock); mutex_destroy(&ds->ds_deadlist.bpl_lock); kmem_free(ds, sizeof (dsl_dataset_t)); dmu_buf_rele(dbuf, tag); @@ -364,6 +365,7 @@ dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, } dsl_dir_close(ds->ds_dir, ds); mutex_destroy(&ds->ds_lock); + mutex_destroy(&ds->ds_opening_lock); mutex_destroy(&ds->ds_deadlist.bpl_lock); kmem_free(ds, sizeof (dsl_dataset_t)); if (err) { @@ -372,12 +374,8 @@ dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, } ds = winner; } else { - uint64_t new = + ds->ds_fsid_guid = unique_insert(ds->ds_phys->ds_fsid_guid); - if (new != ds->ds_phys->ds_fsid_guid) { - /* XXX it won't necessarily be synced... */ - ds->ds_phys->ds_fsid_guid = new; - } } } ASSERT3P(ds->ds_dbuf, ==, dbuf); @@ -554,7 +552,6 @@ dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) dsphys = dbuf->db_data; dsphys->ds_dir_obj = dd->dd_object; dsphys->ds_fsid_guid = unique_create(); - unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, sizeof (dsphys->ds_guid)); dsphys->ds_snapnames_zapobj = @@ -603,7 +600,6 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, dsphys = dbuf->db_data; dsphys->ds_dir_obj = dd->dd_object; dsphys->ds_fsid_guid = unique_create(); - unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, sizeof (dsphys->ds_guid)); dsphys->ds_snapnames_zapobj = @@ -1390,7 +1386,6 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) dsphys = dbuf->db_data; dsphys->ds_dir_obj = ds->ds_dir->dd_object; dsphys->ds_fsid_guid = unique_create(); - unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, sizeof (dsphys->ds_guid)); dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; @@ -1453,9 +1448,15 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) ASSERT(ds->ds_user_ptr != NULL); ASSERT(ds->ds_phys->ds_next_snap_obj == 0); + /* + * in case we had to change ds_fsid_guid when we opened it, + * sync it out now. + */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; + dsl_dir_dirty(ds->ds_dir, tx); dmu_objset_sync(ds->ds_user_ptr, zio, tx); - /* Unneeded? bplist_close(&ds->ds_deadlist); */ } void @@ -1511,7 +1512,7 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds) { - return (ds->ds_phys->ds_fsid_guid); + return (ds->ds_fsid_guid); } void diff --git a/usr/src/uts/common/fs/zfs/dsl_deleg.c b/usr/src/uts/common/fs/zfs/dsl_deleg.c index e8fc0df28c..dc1b52925a 100644 --- a/usr/src/uts/common/fs/zfs/dsl_deleg.c +++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c @@ -89,17 +89,16 @@ /* * Validate that user is allowed to delegate specified permissions. * - * In order to delegate "create" you must have create" + * In order to delegate "create" you must have "create" * and "allow". */ int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr) { nvpair_t *whopair = NULL; - int error = 0; + int error; - if ((error = dsl_deleg_access(ddname, - ZFS_DELEG_PERM_ALLOW, cr)) != 0) + if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0) return (error); while (whopair = nvlist_next_nvpair(nvp, whopair)) { @@ -114,12 +113,11 @@ dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr) if (strcmp(perm, ZFS_DELEG_PERM_ALLOW) == 0) return (EPERM); - if ((error = dsl_deleg_access(ddname, - perm, cr)) != 0) + if ((error = dsl_deleg_access(ddname, perm, cr)) != 0) return (error); } } - return (error); + return (0); } /* @@ -132,25 +130,23 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr) { nvpair_t *whopair = NULL; int error; + char idstr[32]; - if ((error = dsl_deleg_access(ddname, - ZFS_DELEG_PERM_ALLOW, cr)) != 0) + if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0) return (error); + (void) snprintf(idstr, sizeof (idstr), "%lld", + (longlong_t)crgetuid(cr)); + while (whopair = nvlist_next_nvpair(nvp, whopair)) { zfs_deleg_who_type_t type = nvpair_name(whopair)[0]; - char idstr[32]; if (type != ZFS_DELEG_USER && type != ZFS_DELEG_USER_SETS) return (EPERM); - (void) snprintf(idstr, sizeof (idstr), "%lld", - (longlong_t)crgetuid(cr)); if (strcmp(idstr, &nvpair_name(whopair)[3]) != 0) return (EPERM); - - continue; } return (0); } @@ -184,6 +180,7 @@ dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) uint64_t jumpobj; if (nvpair_value_nvlist(whopair, &perms) != 0) { + ASSERT(pa->p_unset); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == 0) { (void) zap_remove(mos, zapobj, whokey, tx); @@ -201,7 +198,7 @@ dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) * If object doesn't exist and we are removing * it, then just continue to next item in nvlist */ - if (pa->p_unset == 1) + if (pa->p_unset) continue; jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); @@ -359,8 +356,8 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) */ typedef struct perm_set { avl_node_t p_node; - char p_setname[ZFS_MAX_DELEG_NAME]; boolean_t p_matched; + char p_setname[ZFS_MAX_DELEG_NAME]; } perm_set_t; static int @@ -408,7 +405,7 @@ dsl_check_access(objset_t *mos, uint64_t zapobj, * check a specified user/group for a requested permission */ static int -dsl_check_user_access(objset_t *os, uint64_t zapobj, const char *perm, +dsl_check_user_access(objset_t *mos, uint64_t zapobj, const char *perm, int checkflag, cred_t *cr) { const gid_t *gids; @@ -418,19 +415,19 @@ dsl_check_user_access(objset_t *os, uint64_t zapobj, const char *perm, /* check for user */ id = crgetuid(cr); - if (dsl_check_access(os, zapobj, + if (dsl_check_access(mos, zapobj, ZFS_DELEG_USER, checkflag, &id, perm) == 0) return (0); /* check for users primary group */ id = crgetgid(cr); - if (dsl_check_access(os, zapobj, + if (dsl_check_access(mos, zapobj, ZFS_DELEG_GROUP, checkflag, &id, perm) == 0) return (0); /* check for everyone entry */ id = -1; - if (dsl_check_access(os, zapobj, + if (dsl_check_access(mos, zapobj, ZFS_DELEG_EVERYONE, checkflag, &id, perm) == 0) return (0); @@ -439,7 +436,7 @@ dsl_check_user_access(objset_t *os, uint64_t zapobj, const char *perm, gids = crgetgroups(cr); for (i = 0; i != ngids; i++) { id = gids[i]; - if (dsl_check_access(os, zapobj, + if (dsl_check_access(mos, zapobj, ZFS_DELEG_GROUP, checkflag, &id, perm) == 0) return (0); } @@ -581,11 +578,6 @@ dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr) zfs_prop_to_name(ZFS_PROP_ZONED), 8, 1, &zoned, NULL) != 0) break; - - /* - * if zoned property isn't set then break - * out and return EPERM. - */ if (!zoned) break; } @@ -595,12 +587,10 @@ dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr) continue; dsl_load_user_sets(mos, zapobj, &permsets, checkflag, cr); - setnode = avl_first(&permsets); again: expanded = B_FALSE; for (setnode = avl_first(&permsets); setnode; setnode = AVL_NEXT(&permsets, setnode)) { - if (setnode->p_matched == B_TRUE) continue; @@ -636,10 +626,8 @@ success: dsl_dir_close(startdd, FTAG); cookie = NULL; - while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL) { - /* These sets were used but never defined! */ + while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL) kmem_free(setnode, sizeof (perm_set_t)); - } return (error); } @@ -649,12 +637,11 @@ success: */ static void -copy_create_perms(objset_t *mos, uint64_t pzapobj, dsl_dir_t *dd, +copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj, boolean_t dosets, uint64_t uid, dmu_tx_t *tx) { - int error; + objset_t *mos = dd->dd_pool->dp_meta_objset; uint64_t jumpobj, pjumpobj; - uint64_t zero = 0; uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; zap_cursor_t zc; zap_attribute_t za; @@ -663,20 +650,18 @@ copy_create_perms(objset_t *mos, uint64_t pzapobj, dsl_dir_t *dd, zfs_deleg_whokey(whokey, dosets ? ZFS_DELEG_CREATE_SETS : ZFS_DELEG_CREATE, ZFS_DELEG_LOCAL, NULL); - error = zap_lookup(mos, pzapobj, whokey, 8, 1, &pjumpobj); - if (error != 0) + if (zap_lookup(mos, pzapobj, whokey, 8, 1, &pjumpobj) != 0) return; - zfs_deleg_whokey(whokey, - dosets ? ZFS_DELEG_USER_SETS : ZFS_DELEG_USER, - ZFS_DELEG_LOCAL, &uid); - if (zapobj == 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); } + zfs_deleg_whokey(whokey, + dosets ? ZFS_DELEG_USER_SETS : ZFS_DELEG_USER, + ZFS_DELEG_LOCAL, &uid); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) { jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0); @@ -685,6 +670,7 @@ copy_create_perms(objset_t *mos, uint64_t pzapobj, dsl_dir_t *dd, for (zap_cursor_init(&zc, mos, pjumpobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { + uint64_t zero = 0; ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1); VERIFY(zap_update(mos, jumpobj, za.za_name, @@ -700,20 +686,20 @@ void dsl_deleg_set_create_perms(dsl_dir_t *sdd, dmu_tx_t *tx, cred_t *cr) { dsl_dir_t *dd; - objset_t *mos = sdd->dd_pool->dp_meta_objset; + uint64_t uid = crgetuid(cr); if (spa_version(dmu_objset_spa(sdd->dd_pool->dp_meta_objset)) < ZFS_VERSION_DELEGATED_PERMS) return; for (dd = sdd->dd_parent; dd != NULL; dd = dd->dd_parent) { - uint64_t pobj = dd->dd_phys->dd_deleg_zapobj; + uint64_t pzapobj = dd->dd_phys->dd_deleg_zapobj; - if (pobj == 0) + if (pzapobj == 0) continue; - copy_create_perms(mos, pobj, sdd, B_FALSE, crgetuid(cr), tx); - copy_create_perms(mos, pobj, sdd, B_TRUE, crgetuid(cr), tx); + copy_create_perms(sdd, pzapobj, B_FALSE, uid, tx); + copy_create_perms(sdd, pzapobj, B_TRUE, uid, tx); } } diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c index 103e80b8f6..e814fe27f3 100644 --- a/usr/src/uts/common/fs/zfs/dsl_prop.c +++ b/usr/src/uts/common/fs/zfs/dsl_prop.c @@ -48,7 +48,7 @@ dodefault(const char *propname, int intsz, int numint, void *buf) zfs_prop_readonly(prop)) return (ENOENT); - if (zfs_prop_get_type(prop) == prop_type_string) { + if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) { if (intsz != 1) return (EOVERFLOW); (void) strncpy(buf, zfs_prop_default_string(prop), numint); diff --git a/usr/src/uts/common/fs/zfs/refcount.c b/usr/src/uts/common/fs/zfs/refcount.c index 411ed46e13..f1b3b23fe2 100644 --- a/usr/src/uts/common/fs/zfs/refcount.c +++ b/usr/src/uts/common/fs/zfs/refcount.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -61,11 +60,13 @@ refcount_fini(void) void refcount_create(refcount_t *rc) { + mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL); list_create(&rc->rc_list, sizeof (reference_t), offsetof(reference_t, ref_link)); list_create(&rc->rc_removed, sizeof (reference_t), offsetof(reference_t, ref_link)); - mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL); + rc->rc_count = 0; + rc->rc_removed_count = 0; } void diff --git a/usr/src/uts/common/fs/zfs/rprwlock.c b/usr/src/uts/common/fs/zfs/rprwlock.c new file mode 100644 index 0000000000..49ae505209 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/rprwlock.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/zfs_context.h> +#include <sys/refcount.h> +#include <sys/rprwlock.h> + +void +rprw_init(rprwlock_t *rwl) +{ + mutex_init(&rwl->rw_lock, NULL, MUTEX_DEFAULT, NULL); + rwl->rw_writer = NULL; + cv_init(&rwl->rw_cv, NULL, CV_DEFAULT, NULL); + refcount_create(&rwl->rw_count); +} + +void +rprw_destroy(rprwlock_t *rwl) +{ + mutex_destroy(&rwl->rw_lock); + ASSERT(rwl->rw_writer == NULL); + cv_destroy(&rwl->rw_cv); + refcount_destroy(&rwl->rw_count); +} + +void +rprw_enter_read(rprwlock_t *rwl, void *tag) +{ + mutex_enter(&rwl->rw_lock); + + if (rwl->rw_writer != curthread) { + while (rwl->rw_writer != NULL) + cv_wait(&rwl->rw_cv, &rwl->rw_lock); + } + + (void) refcount_add(&rwl->rw_count, tag); + + mutex_exit(&rwl->rw_lock); +} + +void +rprw_enter_write(rprwlock_t *rwl, void *tag) +{ + mutex_enter(&rwl->rw_lock); + + if (rwl->rw_writer != curthread) { + while (!refcount_is_zero(&rwl->rw_count)) + cv_wait(&rwl->rw_cv, &rwl->rw_lock); + rwl->rw_writer = curthread; + } + + (void) refcount_add(&rwl->rw_count, tag); + + mutex_exit(&rwl->rw_lock); +} + +void +rprw_enter(rprwlock_t *rwl, krw_t rw, void *tag) +{ + if (rw == RW_READER) + rprw_enter_read(rwl, tag); + else + rprw_enter_write(rwl, tag); +} + +void +rprw_exit(rprwlock_t *rwl, void *tag) +{ + mutex_enter(&rwl->rw_lock); + + ASSERT(!refcount_is_zero(&rwl->rw_count)); + ASSERT(rwl->rw_writer == NULL || curthread == rwl->rw_writer); + if (refcount_remove(&rwl->rw_count, tag) == 0) { + cv_broadcast(&rwl->rw_cv); + rwl->rw_writer = NULL; /* OK in either case */ + } + + mutex_exit(&rwl->rw_lock); +} + +boolean_t +rprw_held(rprwlock_t *rwl, krw_t rw) +{ + boolean_t held; + + mutex_enter(&rwl->rw_lock); + if (rw == RW_WRITER) + held = (rwl->rw_writer == curthread); + else + held = !rwl->rw_writer && !refcount_is_zero(&rwl->rw_count); + mutex_exit(&rwl->rw_lock); + + return (held); +} diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 6159e13aa0..e2435807a9 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -132,12 +132,13 @@ spa_activate(spa_t *spa) rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL); + rprw_init(&spa->spa_config_lock); + mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_config_cache_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&spa->spa_config_lock.scl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_sync_bplist.bpl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index 17a9b4cb9d..0ed306bc29 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -270,7 +270,8 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) vdev_t *rvd = spa->spa_root_vdev; unsigned long hostid = 0; - ASSERT(spa_config_held(spa, RW_READER)); + ASSERT(spa_config_held(spa, RW_READER) || + spa_config_held(spa, RW_WRITER)); if (vd == NULL) vd = rvd; diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c index 5035eb3d68..59bf92a552 100644 --- a/usr/src/uts/common/fs/zfs/spa_misc.c +++ b/usr/src/uts/common/fs/zfs/spa_misc.c @@ -45,6 +45,7 @@ #include <sys/dsl_prop.h> #include <sys/fs/zfs.h> #include <sys/metaslab_impl.h> +#include "zfs_prop.h" /* * SPA locking @@ -76,11 +77,10 @@ * some references in the DMU. Internally we check against SPA_MINREF, but * present the image of a zero/non-zero value to consumers. * - * spa_config_lock (per-spa crazy rwlock) + * spa_config_lock (per-spa read-priority rwlock) * - * This SPA special is a recursive rwlock, capable of being acquired from - * asynchronous threads. It has protects the spa_t from config changes, - * and must be held in the following circumstances: + * This protects the spa_t from config changes, and must be held in + * the following circumstances: * * - RW_READER to perform I/O to the spa * - RW_WRITER to change the vdev config @@ -257,7 +257,7 @@ spa_add(const char *name, const char *altroot) spa->spa_final_txg = UINT64_MAX; refcount_create(&spa->spa_refcount); - refcount_create(&spa->spa_config_lock.scl_count); + rprw_init(&spa->spa_config_lock); avl_add(&spa_namespace_avl, spa); @@ -298,10 +298,10 @@ spa_remove(spa_t *spa) spa_config_set(spa, NULL); refcount_destroy(&spa->spa_refcount); - refcount_destroy(&spa->spa_config_lock.scl_count); + + rprw_destroy(&spa->spa_config_lock); mutex_destroy(&spa->spa_sync_bplist.bpl_lock); - mutex_destroy(&spa->spa_config_lock.scl_lock); mutex_destroy(&spa->spa_errlist_lock); mutex_destroy(&spa->spa_errlog_lock); mutex_destroy(&spa->spa_scrub_lock); @@ -518,79 +518,22 @@ spa_spare_activate(vdev_t *vd) * SPA config locking * ========================================================================== */ - -/* - * Acquire the config lock. The config lock is a special rwlock that allows for - * recursive enters. Because these enters come from the same thread as well as - * asynchronous threads working on behalf of the owner, we must unilaterally - * allow all reads access as long at least one reader is held (even if a write - * is requested). This has the side effect of write starvation, but write locks - * are extremely rare, and a solution to this problem would be significantly - * more complex (if even possible). - * - * We would like to assert that the namespace lock isn't held, but this is a - * valid use during create. - */ void spa_config_enter(spa_t *spa, krw_t rw, void *tag) { - spa_config_lock_t *scl = &spa->spa_config_lock; - - mutex_enter(&scl->scl_lock); - - if (scl->scl_writer != curthread) { - if (rw == RW_READER) { - while (scl->scl_writer != NULL) - cv_wait(&scl->scl_cv, &scl->scl_lock); - } else { - while (scl->scl_writer != NULL || - !refcount_is_zero(&scl->scl_count)) - cv_wait(&scl->scl_cv, &scl->scl_lock); - scl->scl_writer = curthread; - } - } - - (void) refcount_add(&scl->scl_count, tag); - - mutex_exit(&scl->scl_lock); + rprw_enter(&spa->spa_config_lock, rw, tag); } -/* - * Release the spa config lock, notifying any waiters in the process. - */ void spa_config_exit(spa_t *spa, void *tag) { - spa_config_lock_t *scl = &spa->spa_config_lock; - - mutex_enter(&scl->scl_lock); - - ASSERT(!refcount_is_zero(&scl->scl_count)); - if (refcount_remove(&scl->scl_count, tag) == 0) { - cv_broadcast(&scl->scl_cv); - scl->scl_writer = NULL; /* OK in either case */ - } - - mutex_exit(&scl->scl_lock); + rprw_exit(&spa->spa_config_lock, tag); } -/* - * Returns true if the config lock is held in the given manner. - */ boolean_t spa_config_held(spa_t *spa, krw_t rw) { - spa_config_lock_t *scl = &spa->spa_config_lock; - boolean_t held; - - mutex_enter(&scl->scl_lock); - if (rw == RW_WRITER) - held = (scl->scl_writer == curthread); - else - held = !refcount_is_zero(&scl->scl_count); - mutex_exit(&scl->scl_lock); - - return (held); + return (rprw_held(&spa->spa_config_lock, rw)); } /* @@ -1105,6 +1048,7 @@ spa_init(int mode) zio_init(); dmu_init(); zil_init(); + zfs_prop_init(); spa_config_load(); } @@ -1116,6 +1060,7 @@ spa_fini(void) zil_fini(); dmu_fini(); zio_fini(); + unique_fini(); refcount_fini(); avl_destroy(&spa_namespace_avl); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index eba1e64dc4..121152632d 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -157,7 +157,7 @@ void zfs_znode_byteswap(void *buf, size_t size); int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, objset_t **osp); void dmu_objset_close(objset_t *os); -int dmu_objset_evict_dbufs(objset_t *os, int try); +int dmu_objset_evict_dbufs(objset_t *os, boolean_t try); int dmu_objset_create(const char *name, dmu_objset_type_t type, objset_t *clone_parent, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h index a401930af8..fbc1450b47 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h @@ -108,7 +108,7 @@ uint64_t dmu_objset_fsid_guid(objset_t *os); int dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags); void dmu_objset_byteswap(void *buf, size_t size); -int dmu_objset_evict_dbufs(objset_t *os, int try); +int dmu_objset_evict_dbufs(objset_t *os, boolean_t try); /* called from dsl */ void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx); diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h index 327e538cf8..02f9de3f06 100644 --- a/usr/src/uts/common/fs/zfs/sys/dnode.h +++ b/usr/src/uts/common/fs/zfs/sys/dnode.h @@ -226,7 +226,7 @@ void dnode_init(void); void dnode_fini(void); int dnode_next_offset(dnode_t *dn, boolean_t hole, uint64_t *off, int minlvl, uint64_t blkfill, uint64_t txg); -int dnode_evict_dbufs(dnode_t *dn, int try); +int dnode_evict_dbufs(dnode_t *dn, boolean_t try); #ifdef ZFS_DEBUG diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index 8cfc1dcc98..2a8d354be4 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -87,6 +87,7 @@ typedef struct dsl_dataset { dsl_dataset_phys_t *ds_phys; dmu_buf_t *ds_dbuf; uint64_t ds_object; + uint64_t ds_fsid_guid; /* only used in syncing context: */ struct dsl_dataset *ds_prev; /* only valid for non-snapshots */ @@ -110,6 +111,9 @@ typedef struct dsl_dataset { /* no locking; only for making guesses */ uint64_t ds_trysnap_txg; + /* for objset_open() */ + kmutex_t ds_opening_lock; + /* Protected by ds_lock; keep at end of struct for better locality */ char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h index 1cc22b3dc8..1d01123c77 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h @@ -23,8 +23,8 @@ * Use is subject to license terms. */ -#ifndef _SYS_DSL_PERMS_H -#define _SYS_DSL_PERMS_H +#ifndef _SYS_DSL_DELEG_H +#define _SYS_DSL_DELEG_H #pragma ident "%Z%%M% %I% %E% SMI" @@ -48,30 +48,13 @@ extern "C" { #define ZFS_DELEG_PERM_SHARE "share" #define ZFS_DELEG_PERM_SEND "send" #define ZFS_DELEG_PERM_RECEIVE "receive" -#define ZFS_DELEG_PERM_QUOTA "quota" -#define ZFS_DELEG_PERM_RESERVATION "reservation" -#define ZFS_DELEG_PERM_VOLSIZE "volsize" -#define ZFS_DELEG_PERM_RECORDSIZE "recordsize" -#define ZFS_DELEG_PERM_MOUNTPOINT "mountpoint" -#define ZFS_DELEG_PERM_SHARENFS "sharenfs" -#define ZFS_DELEG_PERM_CHECKSUM "checksum" -#define ZFS_DELEG_PERM_COMPRESSION "compression" -#define ZFS_DELEG_PERM_ATIME "atime" -#define ZFS_DELEG_PERM_DEVICES "devices" -#define ZFS_DELEG_PERM_EXEC "exec" -#define ZFS_DELEG_PERM_SETUID "setuid" -#define ZFS_DELEG_PERM_READONLY "readonly" -#define ZFS_DELEG_PERM_ZONED "zoned" -#define ZFS_DELEG_PERM_SNAPDIR "snapdir" -#define ZFS_DELEG_PERM_ACLMODE "aclmode" -#define ZFS_DELEG_PERM_ACLINHERIT "aclinherit" #define ZFS_DELEG_PERM_ALLOW "allow" -#define ZFS_DELEG_PERM_CANMOUNT "canmount" #define ZFS_DELEG_PERM_USERPROP "userprop" -#define ZFS_DELEG_PERM_SHAREISCSI "shareiscsi" -#define ZFS_DELEG_PERM_XATTR "xattr" -#define ZFS_DELEG_PERM_COPIES "copies" -#define ZFS_DELEG_PERM_VERSION "version" + +/* + * Note: the names of properties that are marked delegatable are also + * valid delegated permissions + */ int dsl_deleg_get(const char *ddname, nvlist_t **nvp); int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset); @@ -80,10 +63,10 @@ void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr); int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr); int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr); int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx); -boolean_t dsl_delegation_on(objset_t *os); +boolean_t dsl_delegation_on(objset_t *os); #ifdef __cplusplus } #endif -#endif /* _SYS_DSL_PERMS_H */ +#endif /* _SYS_DSL_DELEG_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/refcount.h b/usr/src/uts/common/fs/zfs/sys/refcount.h index 0b7e12f2cb..d3fe7b1f89 100644 --- a/usr/src/uts/common/fs/zfs/sys/refcount.h +++ b/usr/src/uts/common/fs/zfs/sys/refcount.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -59,7 +59,7 @@ typedef struct refcount { int64_t rc_removed_count; } refcount_t; -/* Note: refcount_t should be initialized to zero before use. */ +/* Note: refcount_t must be initialized with refcount_create() */ void refcount_create(refcount_t *rc); void refcount_destroy(refcount_t *rc); diff --git a/usr/src/uts/common/fs/zfs/sys/rprwlock.h b/usr/src/uts/common/fs/zfs/sys/rprwlock.h new file mode 100644 index 0000000000..ba23799c9d --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/rprwlock.h @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_RPRWLOCK_H +#define _SYS_RPRWLOCK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/inttypes.h> +#include <sys/list.h> +#include <sys/zfs_context.h> +#include <sys/refcount.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct rprwlock { + kmutex_t rw_lock; + kthread_t *rw_writer; + kcondvar_t rw_cv; + refcount_t rw_count; +} rprwlock_t; + +void rprw_init(rprwlock_t *rwl); +void rprw_destroy(rprwlock_t *rwl); +void rprw_enter_read(rprwlock_t *rwl, void *tag); +void rprw_enter_write(rprwlock_t *rwl, void *tag); +void rprw_enter(rprwlock_t *rwl, krw_t rw, void *tag); +void rprw_exit(rprwlock_t *rwl, void *tag); +boolean_t rprw_held(rprwlock_t *rwl, krw_t rw); +#define RPRW_READ_HELD(x) rprw_held(x, RW_READER) +#define RPRW_WRITE_HELD(x) rprw_held(x, RW_WRITER) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_RPRWLOCK_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index 0d155d7c0c..c313c696a4 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -37,19 +37,13 @@ #include <sys/zfs_context.h> #include <sys/avl.h> #include <sys/refcount.h> +#include <sys/rprwlock.h> #include <sys/bplist.h> #ifdef __cplusplus extern "C" { #endif -typedef struct spa_config_lock { - kmutex_t scl_lock; - refcount_t scl_count; - kthread_t *scl_writer; - kcondvar_t scl_cv; -} spa_config_lock_t; - typedef struct spa_error_entry { zbookmark_t se_bookmark; char *se_name; @@ -152,11 +146,12 @@ struct spa { uint64_t spa_bootfs; /* default boot filesystem */ boolean_t spa_delegation; /* delegation on/off */ /* - * spa_refcnt must be the last element because it changes size based on - * compilation options. In order for the MDB module to function - * correctly, the other fields must remain in the same location. + * spa_refcnt & spa_config_lock must be the last elements + * because refcount_t changes size based on compilation options. + * In order for the MDB module to function correctly, the other + * fields must remain in the same location. */ - spa_config_lock_t spa_config_lock; /* configuration changes */ + rprwlock_t spa_config_lock; /* configuration changes */ refcount_t spa_refcount; /* number of opens */ }; diff --git a/usr/src/uts/common/fs/zfs/sys/unique.h b/usr/src/uts/common/fs/zfs/sys/unique.h index c8c177e3ca..2ef3093edf 100644 --- a/usr/src/uts/common/fs/zfs/sys/unique.h +++ b/usr/src/uts/common/fs/zfs/sys/unique.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,8 +38,12 @@ extern "C" { #define UNIQUE_BITS 56 void unique_init(void); +void unique_fini(void); -/* Return a new unique value. */ +/* + * Return a new unique value (which will not be uniquified against until + * it is unique_insert()-ed. + */ uint64_t unique_create(void); /* Return a unique value, which equals the one passed in if possible. */ diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h index 8228fe4709..663e8ac64b 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -133,8 +133,6 @@ typedef struct zfs_cmd { uint64_t zc_nvlist_dst; /* really (char *) */ uint64_t zc_nvlist_dst_size; uint64_t zc_cookie; - uint64_t zc_cred; - uint64_t zc_dev; uint64_t zc_objset_type; uint64_t zc_perm_action; uint64_t zc_history; /* really (char *) */ diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h b/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h index 7a9d578d6e..e76ccc3ce5 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h @@ -52,10 +52,9 @@ struct zfsvfs { uint_t z_acl_mode; /* acl chmod/mode behavior */ uint_t z_acl_inherit; /* acl inheritance behavior */ boolean_t z_atime; /* enable atimes mount option */ - boolean_t z_unmounted1; /* unmounted phase 1 */ - boolean_t z_unmounted2; /* unmounted phase 2 */ - uint32_t z_op_cnt; /* vnode/vfs operations ref count */ - krwlock_t z_um_lock; /* rw lock for umount phase 2 */ + boolean_t z_unmounted; /* unmounted */ + krwlock_t z_unmount_lock; + krwlock_t z_unmount_inactive_lock; list_t z_all_znodes; /* all vnodes in the fs */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */ vnode_t *z_ctldir; /* .zfs directory pointer */ diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h index 4b731ba320..54f9c39619 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h @@ -192,13 +192,15 @@ typedef struct znode { */ #define ZFS_ENTER(zfsvfs) \ { \ - atomic_add_32(&(zfsvfs)->z_op_cnt, 1); \ - if ((zfsvfs)->z_unmounted1) { \ + if (rw_tryenter(&(zfsvfs)->z_unmount_lock, RW_READER) == 0) \ + return (EIO); \ + if ((zfsvfs)->z_unmounted) { \ ZFS_EXIT(zfsvfs); \ return (EIO); \ } \ } -#define ZFS_EXIT(zfsvfs) atomic_add_32(&(zfsvfs)->z_op_cnt, -1) + +#define ZFS_EXIT(zfsvfs) rw_exit(&(zfsvfs)->z_unmount_lock) /* * Macros for dealing with dmu_buf_hold diff --git a/usr/src/uts/common/fs/zfs/sys/zvol.h b/usr/src/uts/common/fs/zfs/sys/zvol.h index 34f1ca1c31..f7a0f8fd4e 100644 --- a/usr/src/uts/common/fs/zfs/sys/zvol.h +++ b/usr/src/uts/common/fs/zfs/sys/zvol.h @@ -40,9 +40,9 @@ extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize); extern int zvol_check_volblocksize(uint64_t volblocksize); extern int zvol_get_stats(objset_t *os, nvlist_t *nv); extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); -extern int zvol_create_minor(const char *, dev_t); +extern int zvol_create_minor(const char *, major_t); extern int zvol_remove_minor(const char *); -extern int zvol_set_volsize(const char *, dev_t, uint64_t); +extern int zvol_set_volsize(const char *, major_t, uint64_t); extern int zvol_set_volblocksize(const char *, uint64_t); extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr); diff --git a/usr/src/uts/common/fs/zfs/unique.c b/usr/src/uts/common/fs/zfs/unique.c index 90aa13817d..fbe7b619a2 100644 --- a/usr/src/uts/common/fs/zfs/unique.c +++ b/usr/src/uts/common/fs/zfs/unique.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,7 +30,7 @@ #include <sys/unique.h> static avl_tree_t unique_avl; -static kmutex_t unique_mtx; /* Lock never initialized. */ +static kmutex_t unique_mtx; typedef struct unique { avl_node_t un_link; @@ -57,12 +57,22 @@ unique_init(void) { avl_create(&unique_avl, unique_compare, sizeof (unique_t), offsetof(unique_t, un_link)); + mutex_init(&unique_mtx, NULL, MUTEX_DEFAULT, NULL); +} + +void +unique_fini(void) +{ + avl_destroy(&unique_avl); + mutex_destroy(&unique_mtx); } uint64_t unique_create(void) { - return (unique_insert(0)); + uint64_t value = unique_insert(0); + unique_remove(value); + return (value); } uint64_t diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index 0158228a34..4b22a68fee 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -318,7 +318,8 @@ vdev_label_read_config(vdev_t *vd) zio_t *zio; int l; - ASSERT(spa_config_held(spa, RW_READER)); + ASSERT(spa_config_held(spa, RW_READER) || + spa_config_held(spa, RW_WRITER)); if (vdev_is_dead(vd)) return (NULL); diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 5947676818..2e95ae6c52 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -280,7 +280,7 @@ zfs_secpolicy_setprop(const char *name, zfs_prop_t prop, cred_t *cr) break; } - return (zfs_secpolicy_write_perms(name, zfs_prop_perm(prop), cr)); + return (zfs_secpolicy_write_perms(name, zfs_prop_to_name(prop), cr)); } int @@ -1175,7 +1175,7 @@ retry: } static int -zfs_set_prop_nvlist(const char *name, dev_t dev, cred_t *cr, nvlist_t *nvl) +zfs_set_prop_nvlist(const char *name, nvlist_t *nvl) { nvpair_t *elem; int error; @@ -1200,13 +1200,13 @@ zfs_set_prop_nvlist(const char *name, dev_t dev, cred_t *cr, nvlist_t *nvl) return (EINVAL); error = zfs_secpolicy_write_perms(name, - ZFS_DELEG_PERM_USERPROP, cr); + ZFS_DELEG_PERM_USERPROP, CRED()); if (error) return (error); continue; } - if ((error = zfs_secpolicy_setprop(name, prop, cr)) != 0) + if ((error = zfs_secpolicy_setprop(name, prop, CRED())) != 0) return (error); /* @@ -1285,7 +1285,8 @@ zfs_set_prop_nvlist(const char *name, dev_t dev, cred_t *cr, nvlist_t *nvl) case ZFS_PROP_VOLSIZE: if ((error = nvpair_value_uint64(elem, &intval)) != 0 || - (error = zvol_set_volsize(name, dev, intval)) != 0) + (error = zvol_set_volsize(name, + ddi_driver_major(zfs_dip), intval)) != 0) return (error); break; @@ -1304,7 +1305,7 @@ zfs_set_prop_nvlist(const char *name, dev_t dev, cred_t *cr, nvlist_t *nvl) default: if (nvpair_type(elem) == DATA_TYPE_STRING) { if (zfs_prop_get_type(prop) != - prop_type_string) + PROP_TYPE_STRING) return (EINVAL); VERIFY(nvpair_value_string(elem, &strval) == 0); if ((error = dsl_prop_set(name, @@ -1317,15 +1318,15 @@ zfs_set_prop_nvlist(const char *name, dev_t dev, cred_t *cr, nvlist_t *nvl) VERIFY(nvpair_value_uint64(elem, &intval) == 0); switch (zfs_prop_get_type(prop)) { - case prop_type_number: + case PROP_TYPE_NUMBER: break; - case prop_type_boolean: + case PROP_TYPE_BOOLEAN: if (intval > 1) return (EINVAL); break; - case prop_type_string: + case PROP_TYPE_STRING: return (EINVAL); - case prop_type_index: + case PROP_TYPE_INDEX: if (zfs_prop_index_to_string(prop, intval, &unused) != 0) return (EINVAL); @@ -1366,13 +1367,12 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) if (!zfs_prop_user(zc->zc_value)) return (EINVAL); error = zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_USERPROP, - (cred_t *)(uintptr_t)zc->zc_cred); + ZFS_DELEG_PERM_USERPROP, CRED()); } else { if (!zfs_prop_inheritable(prop)) return (EINVAL); error = zfs_secpolicy_setprop(zc->zc_name, - prop, (cred_t *)(uintptr_t)zc->zc_cred); + prop, CRED()); } if (error) return (error); @@ -1383,8 +1383,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) if ((error = get_nvlist(zc, &nvl)) != 0) return (error); - error = zfs_set_prop_nvlist(zc->zc_name, zc->zc_dev, - (cred_t *)(uintptr_t)zc->zc_cred, nvl); + error = zfs_set_prop_nvlist(zc->zc_name, nvl); nvlist_free(nvl); return (error); @@ -1555,7 +1554,7 @@ zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc) } nvlist_free(nvp); error = dsl_deleg_access(zc->zc_name, - ZFS_DELEG_PERM_SHAREISCSI, usercred); + zfs_prop_to_name(ZFS_PROP_SHAREISCSI), usercred); crfree(usercred); return (error); } @@ -1565,7 +1564,6 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc) { int error; nvlist_t *fsaclnv = NULL; - cred_t *cr; if ((error = get_nvlist(zc, &fsaclnv)) != 0) return (error); @@ -1584,13 +1582,15 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc) * the nvlist(s) */ - cr = (cred_t *)(uintptr_t)zc->zc_cred; - error = secpolicy_zfs(cr); + error = secpolicy_zfs(CRED()); if (error) { - if (zc->zc_perm_action == B_FALSE) - error = dsl_deleg_can_allow(zc->zc_name, fsaclnv, cr); - else - error = dsl_deleg_can_unallow(zc->zc_name, fsaclnv, cr); + if (zc->zc_perm_action == B_FALSE) { + error = dsl_deleg_can_allow(zc->zc_name, + fsaclnv, CRED()); + } else { + error = dsl_deleg_can_unallow(zc->zc_name, + fsaclnv, CRED()); + } } if (error == 0) @@ -1617,7 +1617,7 @@ zfs_ioc_get_fsacl(zfs_cmd_t *zc) static int zfs_ioc_create_minor(zfs_cmd_t *zc) { - return (zvol_create_minor(zc->zc_name, zc->zc_dev)); + return (zvol_create_minor(zc->zc_name, ddi_driver_major(zfs_dip))); } static int @@ -1766,9 +1766,7 @@ zfs_ioc_create(zfs_cmd_t *zc) * It would be nice to do this atomically. */ if (error == 0) { - if ((error = zfs_set_prop_nvlist(zc->zc_name, - zc->zc_dev, (cred_t *)(uintptr_t)zc->zc_cred, - nvprops)) != 0) + if ((error = zfs_set_prop_nvlist(zc->zc_name, nvprops)) != 0) (void) dmu_objset_destroy(zc->zc_name); } @@ -2185,6 +2183,7 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); vec = cmd - ZFS_IOC; + ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip)); if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) return (EINVAL); @@ -2193,11 +2192,8 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) error = xcopyin((void *)arg, zc, sizeof (zfs_cmd_t)); - if (error == 0) { - zc->zc_cred = (uintptr_t)cr; - zc->zc_dev = dev; + if (error == 0) error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr); - } /* * Ensure that all pool/dataset names are valid before we pass down to diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c index f69fdff1ce..c36dc9f23a 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c @@ -73,7 +73,6 @@ static int zfs_root(vfs_t *vfsp, vnode_t **vpp); static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); static void zfs_freevfs(vfs_t *vfsp); -static void zfs_objset_close(zfsvfs_t *zfsvfs); static const fs_operation_def_t zfs_vfsops_template[] = { VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, @@ -526,7 +525,8 @@ zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); - rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); + rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL); + rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL); /* Initialize the generic filesystem structure. */ vfsp->vfs_bcount = 0; @@ -1009,6 +1009,8 @@ static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) { zfsvfs_t *zfsvfs = vfsp->vfs_data; + objset_t *os = zfsvfs->z_os; + znode_t *zp, *nextzp; int ret; ret = secpolicy_fs_unmount(cr, vfsp); @@ -1036,58 +1038,102 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) return (ret); } - if (fflag & MS_FORCE) { - vfsp->vfs_flag |= VFS_UNMOUNTED; - zfsvfs->z_unmounted1 = B_TRUE; - - /* - * Ensure that z_unmounted1 reaches global visibility - * before z_op_cnt. - */ - membar_producer(); - + if (!(fflag & MS_FORCE)) { /* - * Wait for all zfs threads to leave zfs. - * Grabbing a rwlock as reader in all vops and - * as writer here doesn't work because it too easy to get - * multiple reader enters as zfs can re-enter itself. - * This can lead to deadlock if there is an intervening - * rw_enter as writer. - * So a file system threads ref count (z_op_cnt) is used. - * A polling loop on z_op_cnt may seem inefficient, but - * - this saves all threads on exit from having to grab a - * mutex in order to cv_signal - * - only occurs on forced unmount in the rare case when - * there are outstanding threads within the file system. + * Check the number of active vnodes in the file system. + * Our count is maintained in the vfs structure, but the + * number is off by 1 to indicate a hold on the vfs + * structure itself. + * + * The '.zfs' directory maintains a reference of its + * own, and any active references underneath are + * reflected in the vnode count. */ - while (zfsvfs->z_op_cnt) { - delay(1); + if (zfsvfs->z_ctldir == NULL) { + if (vfsp->vfs_count > 1) + return (EBUSY); + } else { + if (vfsp->vfs_count > 2 || + zfsvfs->z_ctldir->v_count > 1) { + return (EBUSY); + } } + } - zfs_objset_close(zfsvfs); + vfsp->vfs_flag |= VFS_UNMOUNTED; + + rw_enter(&zfsvfs->z_unmount_lock, RW_WRITER); + rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_WRITER); - return (0); - } /* - * Check the number of active vnodes in the file system. - * Our count is maintained in the vfs structure, but the number - * is off by 1 to indicate a hold on the vfs structure itself. + * At this point there are no vops active, and any new vops will + * fail with EIO since we have z_unmount_lock for writer (only + * relavent for forced unmount). * - * The '.zfs' directory maintains a reference of its own, and any active - * references underneath are reflected in the vnode count. + * Release all holds on dbufs. + * Note, the dmu can still callback via znode_pageout_func() + * which can zfs_znode_free() the znode. So we lock + * z_all_znodes; search the list for a held dbuf; drop the lock + * (we know zp can't disappear if we hold a dbuf lock) then + * regrab the lock and restart. */ - if (zfsvfs->z_ctldir == NULL) { - if (vfsp->vfs_count > 1) - return (EBUSY); - } else { - if (vfsp->vfs_count > 2 || - (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { - return (EBUSY); + mutex_enter(&zfsvfs->z_znodes_lock); + for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { + nextzp = list_next(&zfsvfs->z_all_znodes, zp); + if (zp->z_dbuf_held) { + /* dbufs should only be held when force unmounting */ + zp->z_dbuf_held = 0; + mutex_exit(&zfsvfs->z_znodes_lock); + dmu_buf_rele(zp->z_dbuf, NULL); + /* Start again */ + mutex_enter(&zfsvfs->z_znodes_lock); + nextzp = list_head(&zfsvfs->z_all_znodes); } } + mutex_exit(&zfsvfs->z_znodes_lock); - vfsp->vfs_flag |= VFS_UNMOUNTED; - zfs_objset_close(zfsvfs); + /* + * Set the unmounted flag and let new vops unblock. + * zfs_inactive will have the unmounted behavior, and all other + * vops will fail with EIO. + */ + zfsvfs->z_unmounted = B_TRUE; + rw_exit(&zfsvfs->z_unmount_lock); + rw_exit(&zfsvfs->z_unmount_inactive_lock); + + /* + * Unregister properties. + */ + if (!dmu_objset_is_snapshot(os)) + zfs_unregister_callbacks(zfsvfs); + + /* + * Close the zil. NB: Can't close the zil while zfs_inactive + * threads are blocked as zil_close can call zfs_inactive. + */ + if (zfsvfs->z_log) { + zil_close(zfsvfs->z_log); + zfsvfs->z_log = NULL; + } + + /* + * Evict all dbufs so that cached znodes will be freed + */ + if (dmu_objset_evict_dbufs(os, B_TRUE)) { + txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); + (void) dmu_objset_evict_dbufs(os, B_FALSE); + } + + /* + * Finally close the objset + */ + dmu_objset_close(os); + + /* + * We can now safely destroy the '.zfs' directory node. + */ + if (zfsvfs->z_ctldir != NULL) + zfsctl_destroy(zfsvfs); return (0); } @@ -1177,92 +1223,13 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) } static void -zfs_objset_close(zfsvfs_t *zfsvfs) -{ - znode_t *zp, *nextzp; - objset_t *os = zfsvfs->z_os; - - /* - * For forced unmount, at this point all vops except zfs_inactive - * are erroring EIO. We need to now suspend zfs_inactive threads - * while we are freeing dbufs before switching zfs_inactive - * to use behaviour without a objset. - */ - rw_enter(&zfsvfs->z_um_lock, RW_WRITER); - - /* - * Release all holds on dbufs - * Note, although we have stopped all other vop threads and - * zfs_inactive(), the dmu can callback via znode_pageout_func() - * which can zfs_znode_free() the znode. - * So we lock z_all_znodes; search the list for a held - * dbuf; drop the lock (we know zp can't disappear if we hold - * a dbuf lock; then regrab the lock and restart. - */ - mutex_enter(&zfsvfs->z_znodes_lock); - for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { - nextzp = list_next(&zfsvfs->z_all_znodes, zp); - if (zp->z_dbuf_held) { - /* dbufs should only be held when force unmounting */ - zp->z_dbuf_held = 0; - mutex_exit(&zfsvfs->z_znodes_lock); - dmu_buf_rele(zp->z_dbuf, NULL); - /* Start again */ - mutex_enter(&zfsvfs->z_znodes_lock); - nextzp = list_head(&zfsvfs->z_all_znodes); - } - } - mutex_exit(&zfsvfs->z_znodes_lock); - - /* - * Unregister properties. - */ - if (!dmu_objset_is_snapshot(os)) - zfs_unregister_callbacks(zfsvfs); - - /* - * Switch zfs_inactive to behaviour without an objset. - * It just tosses cached pages and frees the znode & vnode. - * Then re-enable zfs_inactive threads in that new behaviour. - */ - zfsvfs->z_unmounted2 = B_TRUE; - rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ - - /* - * Close the zil. Can't close the zil while zfs_inactive - * threads are blocked as zil_close can call zfs_inactive. - */ - if (zfsvfs->z_log) { - zil_close(zfsvfs->z_log); - zfsvfs->z_log = NULL; - } - - /* - * Evict all dbufs so that cached znodes will be freed - */ - if (dmu_objset_evict_dbufs(os, 1)) { - txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); - (void) dmu_objset_evict_dbufs(os, 0); - } - - /* - * Finally close the objset - */ - dmu_objset_close(os); - - /* - * We can now safely destroy the '.zfs' directory node. - */ - if (zfsvfs->z_ctldir != NULL) - zfsctl_destroy(zfsvfs); - -} - -static void zfs_freevfs(vfs_t *vfsp) { zfsvfs_t *zfsvfs = vfsp->vfs_data; + mutex_destroy(&zfsvfs->z_znodes_lock); + rw_destroy(&zfsvfs->z_unmount_lock); + rw_destroy(&zfsvfs->z_unmount_inactive_lock); kmem_free(zfsvfs, sizeof (zfsvfs_t)); atomic_add_32(&zfs_active_fs_count, -1); diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index cd592628d9..852555b7f3 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -3005,8 +3005,8 @@ zfs_inactive(vnode_t *vp, cred_t *cr) zfsvfs_t *zfsvfs = zp->z_zfsvfs; int error; - rw_enter(&zfsvfs->z_um_lock, RW_READER); - if (zfsvfs->z_unmounted2) { + rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_READER); + if (zfsvfs->z_unmounted) { ASSERT(zp->z_dbuf_held == 0); if (vn_has_cached_data(vp)) { @@ -3022,7 +3022,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr) } else { mutex_exit(&zp->z_lock); } - rw_exit(&zfsvfs->z_um_lock); + rw_exit(&zfsvfs->z_unmount_inactive_lock); VFS_RELE(zfsvfs->z_vfs); return; } @@ -3053,7 +3053,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr) } zfs_zinactive(zp); - rw_exit(&zfsvfs->z_um_lock); + rw_exit(&zfsvfs->z_unmount_inactive_lock); } /* diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 2d9cc65ef8..79e12f7f00 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -114,9 +114,9 @@ int zvol_maxphys = DMU_MAX_ACCESS/2; static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); static void -zvol_size_changed(zvol_state_t *zv, dev_t dev) +zvol_size_changed(zvol_state_t *zv, major_t maj) { - dev = makedevice(getmajor(dev), zv->zv_minor); + dev_t dev = makedevice(maj, zv->zv_minor); VERIFY(ddi_prop_update_int64(dev, zfs_dip, "Size", zv->zv_volsize) == DDI_SUCCESS); @@ -315,7 +315,7 @@ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { * Create a minor node for the specified volume. */ int -zvol_create_minor(const char *name, dev_t dev) +zvol_create_minor(const char *name, major_t maj) { zvol_state_t *zv; objset_t *os; @@ -452,7 +452,7 @@ zvol_create_minor(const char *name, dev_t dev) zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector); - zvol_size_changed(zv, dev); + zvol_size_changed(zv, maj); /* XXX this should handle the possible i/o error */ VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset), @@ -512,7 +512,7 @@ zvol_remove_minor(const char *name) } int -zvol_set_volsize(const char *name, dev_t dev, uint64_t volsize) +zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) { zvol_state_t *zv; dmu_tx_t *tx; @@ -559,7 +559,7 @@ zvol_set_volsize(const char *name, dev_t dev, uint64_t volsize) if (error == 0) { zv->zv_volsize = volsize; - zvol_size_changed(zv, dev); + zvol_size_changed(zv, maj); } mutex_exit(&zvol_state_lock); |