diff options
Diffstat (limited to 'usr/src/uts')
28 files changed, 436 insertions, 185 deletions
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c index ae0b1fc878..a8569e9a88 100644 --- a/usr/src/uts/common/fs/zfs/dbuf.c +++ b/usr/src/uts/common/fs/zfs/dbuf.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2019 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. @@ -176,6 +176,7 @@ dbuf_cons(void *vdb, void *unused, int kmflag) bzero(db, sizeof (dmu_buf_impl_t)); mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL); + rw_init(&db->db_rwlock, NULL, RW_DEFAULT, NULL); cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL); multilist_link_init(&db->db_cache_link); zfs_refcount_create(&db->db_holds); @@ -189,6 +190,7 @@ dbuf_dest(void *vdb, void *unused) { dmu_buf_impl_t *db = vdb; mutex_destroy(&db->db_mtx); + rw_destroy(&db->db_rwlock); cv_destroy(&db->db_changed); ASSERT(!multilist_link_active(&db->db_cache_link)); zfs_refcount_destroy(&db->db_holds); @@ -789,10 +791,10 @@ dbuf_verify(dmu_buf_impl_t *db) db->db.db_object); /* * dnode_grow_indblksz() can make this fail if we don't - * have the struct_rwlock. XXX indblksz no longer + * have the parent's rwlock. XXX indblksz no longer * grows. safe to do this now? */ - if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) { + if (RW_LOCK_HELD(&db->db_parent->db_rwlock)) { ASSERT3P(db->db_blkptr, ==, ((blkptr_t *)db->db_parent->db.db_data + db->db_blkid % epb)); @@ -868,6 +870,44 @@ dbuf_clear_data(dmu_buf_impl_t *db) db->db_state = DB_UNCACHED; } +/* + * This function is used to lock the parent of the provided dbuf. This should be + * used when modifying or reading db_blkptr. + */ +db_lock_type_t +dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag) +{ + enum db_lock_type ret = DLT_NONE; + if (db->db_parent != NULL) { + rw_enter(&db->db_parent->db_rwlock, rw); + ret = DLT_PARENT; + } else if (dmu_objset_ds(db->db_objset) != NULL) { + rrw_enter(&dmu_objset_ds(db->db_objset)->ds_bp_rwlock, rw, + tag); + ret = DLT_OBJSET; + } + /* + * We only return a DLT_NONE lock when it's the top-most indirect block + * of the meta-dnode of the MOS. + */ + return (ret); +} + +/* + * We need to pass the lock type in because it's possible that the block will + * move from being the topmost indirect block in a dnode (and thus, have no + * parent) to not the top-most via an indirection increase. This would cause a + * panic if we didn't pass the lock type in. + */ +void +dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag) +{ + if (type == DLT_PARENT) + rw_exit(&db->db_parent->db_rwlock); + else if (type == DLT_OBJSET) + rrw_exit(&dmu_objset_ds(db->db_objset)->ds_bp_rwlock, tag); +} + static void dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) { @@ -1042,8 +1082,13 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags) return (err); } +/* + * Drops db_mtx and the parent lock specified by dblt and tag before + * returning. + */ static int -dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) +dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, + db_lock_type_t dblt, void *tag) { dnode_t *dn; zbookmark_phys_t zb; @@ -1053,11 +1098,11 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_ENTER(db); dn = DB_DNODE(db); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); - /* We need the struct_rwlock to prevent db_blkptr from changing. */ - ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(db->db_state == DB_UNCACHED); ASSERT(db->db_buf == NULL); + ASSERT(db->db_parent == NULL || + RW_LOCK_HELD(&db->db_parent->db_rwlock)); if (db->db_blkid == DMU_BONUS_BLKID) { /* @@ -1094,6 +1139,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_EXIT(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); + dmu_buf_unlock_parent(db, dblt, tag); return (0); } @@ -1134,6 +1180,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_EXIT(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); + dmu_buf_unlock_parent(db, dblt, tag); return (0); } @@ -1150,12 +1197,14 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) "object set %llu", dmu_objset_id(db->db_objset)); DB_DNODE_EXIT(db); mutex_exit(&db->db_mtx); + dmu_buf_unlock_parent(db, dblt, tag); return (SET_ERROR(EIO)); } err = dbuf_read_verify_dnode_crypt(db, flags); if (err != 0) { DB_DNODE_EXIT(db); + dmu_buf_unlock_parent(db, dblt, tag); mutex_exit(&db->db_mtx); return (err); } @@ -1175,11 +1224,18 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr)) zio_flags |= ZIO_FLAG_RAW; - - err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr, + /* + * The zio layer will copy the provided blkptr later, but we need to + * do this now so that we can release the parent's rwlock. We have to + * do that now so that if dbuf_read_done is called synchronously (on + * an l1 cache hit) we don't acquire the db_mtx while holding the + * parent's rwlock, which would be a lock ordering violation. + */ + blkptr_t bp = *db->db_blkptr; + dmu_buf_unlock_parent(db, dblt, tag); + (void) arc_read(zio, db->db_objset->os_spa, &bp, dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); - return (err); } @@ -1278,8 +1334,6 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_ENTER(db); dn = DB_DNODE(db); - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_enter(&dn->dn_struct_rwlock, RW_READER); prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL && @@ -1316,29 +1370,32 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) dbuf_set_data(db, db->db_buf); } mutex_exit(&db->db_mtx); - if (err == 0 && prefetch) - dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_exit(&dn->dn_struct_rwlock); + if (err == 0 && prefetch) { + dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE, + flags & DB_RF_HAVESTRUCT); + } DB_DNODE_EXIT(db); } else if (db->db_state == DB_UNCACHED) { spa_t *spa = dn->dn_objset->os_spa; boolean_t need_wait = B_FALSE; + db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); + if (zio == NULL && db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) { zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); need_wait = B_TRUE; } - err = dbuf_read_impl(db, zio, flags); - - /* dbuf_read_impl has dropped db_mtx for us */ - - if (!err && prefetch) - dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); + err = dbuf_read_impl(db, zio, flags, dblt, FTAG); + /* + * dbuf_read_impl has dropped db_mtx and our parent's rwlock + * for us + */ + if (!err && prefetch) { + dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE, + flags & DB_RF_HAVESTRUCT); + } - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(db); if (!err && need_wait) @@ -1353,10 +1410,10 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) * occurred and the dbuf went to UNCACHED. */ mutex_exit(&db->db_mtx); - if (prefetch) - dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); - if ((flags & DB_RF_HAVESTRUCT) == 0) - rw_exit(&dn->dn_struct_rwlock); + if (prefetch) { + dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE, + flags & DB_RF_HAVESTRUCT); + } DB_DNODE_EXIT(db); /* Skip the wait per the caller's request. */ @@ -1536,7 +1593,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, if (db->db_state == DB_CACHED) { ASSERT(db->db.db_data != NULL); arc_release(db->db_buf, db); + rw_enter(&db->db_rwlock, RW_WRITER); bzero(db->db.db_data, db->db.db_size); + rw_exit(&db->db_rwlock); arc_buf_freeze(db->db_buf); } @@ -1558,15 +1617,6 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) DB_DNODE_ENTER(db); dn = DB_DNODE(db); - /* XXX does *this* func really need the lock? */ - ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); - - /* - * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held - * is OK, because there can be no other references to the db - * when we are changing its size, so no concurrent DB_FILL can - * be happening. - */ /* * XXX we should be doing a dbuf_read, checking the return * value and returning that up to our callers @@ -1643,8 +1693,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dnode_t *dn; objset_t *os; dbuf_dirty_record_t **drp, *dr; - int drop_struct_lock = FALSE; int txgoff = tx->tx_txg & TXG_MASK; + boolean_t drop_struct_rwlock = B_FALSE; ASSERT(tx->tx_txg != 0); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); @@ -1846,15 +1896,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (dr); } - /* - * The dn_struct_rwlock prevents db_blkptr from changing - * due to a write from syncing context completing - * while we are running, so we want to acquire it before - * looking at db_blkptr. - */ if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { rw_enter(&dn->dn_struct_rwlock, RW_READER); - drop_struct_lock = TRUE; + drop_struct_rwlock = B_TRUE; + } + + /* + * If we are overwriting a dedup BP, then unless it is snapshotted, + * when we get to syncing context we will need to decrement its + * refcount in the DDT. Prefetch the relevant DDT block so that + * syncing context won't have to wait for the i/o. + */ + if (db->db_blkptr != NULL) { + db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); + ddt_prefetch(os->os_spa, db->db_blkptr); + dmu_buf_unlock_parent(db, dblt, FTAG); } /* @@ -1867,19 +1923,12 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level || dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level); - /* - * If we are overwriting a dedup BP, then unless it is snapshotted, - * when we get to syncing context we will need to decrement its - * refcount in the DDT. Prefetch the relevant DDT block so that - * syncing context won't have to wait for the i/o. - */ - ddt_prefetch(os->os_spa, db->db_blkptr); if (db->db_level == 0) { ASSERT(!db->db_objset->os_raw_receive || dn->dn_maxblkid >= db->db_blkid); dnode_new_blkid(dn, db->db_blkid, tx, - drop_struct_lock, B_FALSE); + drop_struct_rwlock, B_FALSE); ASSERT(dn->dn_maxblkid >= db->db_blkid); } @@ -1890,15 +1939,14 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) { int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; - - parent = dbuf_hold_level(dn, db->db_level+1, + parent = dbuf_hold_level(dn, db->db_level + 1, db->db_blkid >> epbs, FTAG); ASSERT(parent != NULL); parent_held = TRUE; } - if (drop_struct_lock) + if (drop_struct_rwlock) rw_exit(&dn->dn_struct_rwlock); - ASSERT3U(db->db_level+1, ==, parent->db_level); + ASSERT3U(db->db_level + 1, ==, parent->db_level); di = dbuf_dirty(parent, tx); if (parent_held) dbuf_rele(parent, FTAG); @@ -1919,14 +1967,14 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) } mutex_exit(&db->db_mtx); } else { - ASSERT(db->db_level+1 == dn->dn_nlevels); + ASSERT(db->db_level + 1 == dn->dn_nlevels); ASSERT(db->db_blkid < dn->dn_nblkptr); ASSERT(db->db_parent == NULL || db->db_parent == dn->dn_dbuf); mutex_enter(&dn->dn_mtx); ASSERT(!list_link_active(&dr->dr_dirty_node)); list_insert_tail(&dn->dn_dirty_records[txgoff], dr); mutex_exit(&dn->dn_mtx); - if (drop_struct_lock) + if (drop_struct_rwlock) rw_exit(&dn->dn_struct_rwlock); } @@ -2447,10 +2495,12 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, *parentp = NULL; return (err); } + rw_enter(&(*parentp)->db_rwlock, RW_READER); *bpp = ((blkptr_t *)(*parentp)->db.db_data) + (blkid & ((1ULL << epbs) - 1)); if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs))) ASSERT(BP_IS_HOLE(*bpp)); + rw_exit(&(*parentp)->db_rwlock); return (0); } else { /* the block is referenced from the dnode */ @@ -2695,7 +2745,7 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio, if (blkid > dn->dn_maxblkid) return; - if (dnode_block_freed(dn, blkid)) + if (level == 0 && dnode_block_freed(dn, blkid)) return; /* @@ -2841,7 +2891,9 @@ dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db, dbuf_dirty_record_t *dr) DBUF_GET_BUFC_TYPE(db), db->db.db_size)); } + rw_enter(&db->db_rwlock, RW_WRITER); bcopy(data->b_data, db->db.db_data, arc_buf_size(data)); + rw_exit(&db->db_rwlock); } /* @@ -2967,7 +3019,6 @@ int dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - dnode_t *dn; if (db->db_blkid != DMU_SPILL_BLKID) return (SET_ERROR(ENOTSUP)); @@ -2976,12 +3027,7 @@ dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx) ASSERT3U(blksz, <=, spa_maxblocksize(dmu_objset_spa(db->db_objset))); blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE); - DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - rw_enter(&dn->dn_struct_rwlock, RW_WRITER); dbuf_new_size(db, blksz, tx); - rw_exit(&dn->dn_struct_rwlock); - DB_DNODE_EXIT(db); return (0); } @@ -3697,9 +3743,9 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) mutex_exit(&db->db_mtx); - rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_WRITER, FTAG); *db->db_blkptr = *bp; - rw_exit(&dn->dn_struct_rwlock); + dmu_buf_unlock_parent(db, dblt, FTAG); } /* ARGSUSED */ @@ -3740,9 +3786,9 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb) * anybody from reading the blocks we're about to * zero out. */ - rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + rw_enter(&db->db_rwlock, RW_WRITER); bzero(db->db.db_data, db->db.db_size); - rw_exit(&dn->dn_struct_rwlock); + rw_exit(&db->db_rwlock); } DB_DNODE_EXIT(db); } @@ -3932,7 +3978,7 @@ dbuf_remap_impl_callback(uint64_t vdev, uint64_t offset, uint64_t size, } static void -dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, dmu_tx_t *tx) +dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx) { blkptr_t bp_copy = *bp; spa_t *spa = dmu_objset_spa(dn->dn_objset); @@ -3946,14 +3992,16 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, dmu_tx_t *tx) if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback, &drica)) { /* - * The struct_rwlock prevents dbuf_read_impl() from + * The db_rwlock prevents dbuf_read_impl() from * dereferencing the BP while we are changing it. To * avoid lock contention, only grab it when we are actually * changing the BP. */ - rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + if (rw != NULL) + rw_enter(rw, RW_WRITER); *bp = bp_copy; - rw_exit(&dn->dn_struct_rwlock); + if (rw != NULL) + rw_exit(rw); } } @@ -4026,7 +4074,7 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx) if (db->db_level > 0) { blkptr_t *bp = db->db.db_data; for (int i = 0; i < db->db.db_size >> SPA_BLKPTRSHIFT; i++) { - dbuf_remap_impl(dn, &bp[i], tx); + dbuf_remap_impl(dn, &bp[i], &db->db_rwlock, tx); } } else if (db->db.db_object == DMU_META_DNODE_OBJECT) { dnode_phys_t *dnp = db->db.db_data; @@ -4034,7 +4082,10 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx) DMU_OT_DNODE); for (int i = 0; i < db->db.db_size >> DNODE_SHIFT; i++) { for (int j = 0; j < dnp[i].dn_nblkptr; j++) { - dbuf_remap_impl(dn, &dnp[i].dn_blkptr[j], tx); + krwlock_t *lock = (dn->dn_dbuf == NULL ? NULL : + &dn->dn_dbuf->db_rwlock); + dbuf_remap_impl(dn, &dnp[i].dn_blkptr[j], lock, + tx); } } } diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index 67ad5d10f6..6620fbc43e 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -172,8 +172,8 @@ dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, uint64_t blkid; dmu_buf_impl_t *db; - blkid = dbuf_whichblock(dn, 0, offset); rw_enter(&dn->dn_struct_rwlock, RW_READER); + blkid = dbuf_whichblock(dn, 0, offset); db = dbuf_hold(dn, blkid, tag); rw_exit(&dn->dn_struct_rwlock); @@ -197,8 +197,8 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset, err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); - blkid = dbuf_whichblock(dn, 0, offset); rw_enter(&dn->dn_struct_rwlock, RW_READER); + blkid = dbuf_whichblock(dn, 0, offset); db = dbuf_hold(dn, blkid, tag); rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); @@ -605,7 +605,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, if ((flags & DMU_READ_NO_PREFETCH) == 0 && DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) { dmu_zfetch(&dn->dn_zfetch, blkid, nblks, - read && DNODE_IS_CACHEABLE(dn)); + read && DNODE_IS_CACHEABLE(dn), B_TRUE); } rw_exit(&dn->dn_struct_rwlock); @@ -737,7 +737,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, if (err != 0) return; - rw_enter(&dn->dn_struct_rwlock, RW_READER); /* * offset + len - 1 is the last byte we want to prefetch for, and offset * is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the @@ -745,6 +744,7 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, * offset) is the first. Then the number we need to prefetch is the * last - first + 1. */ + rw_enter(&dn->dn_struct_rwlock, RW_READER); if (level > 0 || dn->dn_datablkshift != 0) { nblks = dbuf_whichblock(dn, level, offset + len - 1) - dbuf_whichblock(dn, level, offset) + 1; @@ -757,7 +757,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, for (int i = 0; i < nblks; i++) dbuf_prefetch(dn, level, blkid + i, pri, 0); } - rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 884f0b36bb..a98097a8ee 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -684,8 +684,9 @@ dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, dsl_pool_t *dp; dsl_dataset_t *ds; int err; - ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; + ds_hold_flags_t flags; + flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE; err = dsl_pool_hold(name, tag, &dp); if (err != 0) return (err); @@ -758,8 +759,9 @@ dmu_objset_own(const char *name, dmu_objset_type_t type, dsl_pool_t *dp; dsl_dataset_t *ds; int err; - ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; + ds_hold_flags_t flags; + flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE; err = dsl_pool_hold(name, FTAG, &dp); if (err != 0) return (err); @@ -797,8 +799,9 @@ dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, { dsl_dataset_t *ds; int err; - ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; + ds_hold_flags_t flags; + flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE; err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds); if (err != 0) return (err); @@ -815,9 +818,10 @@ dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag) { - ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; - + ds_hold_flags_t flags; dsl_pool_t *dp = dmu_objset_pool(os); + + flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE; dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag); dsl_pool_rele(dp, tag); } @@ -845,7 +849,9 @@ dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds, { dsl_pool_t *dp; char name[ZFS_MAX_DATASET_NAME_LEN]; + ds_hold_flags_t flags; + flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE; VERIFY3P(ds, !=, NULL); VERIFY3P(ds->ds_owner, ==, tag); VERIFY(dsl_dataset_long_held(ds)); @@ -854,21 +860,22 @@ dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds, dp = ds->ds_dir->dd_pool; dsl_pool_config_enter(dp, FTAG); - dsl_dataset_disown(ds, decrypt, tag); - VERIFY0(dsl_dataset_own(dp, name, - (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, newds)); + dsl_dataset_disown(ds, flags, tag); + VERIFY0(dsl_dataset_own(dp, name, flags, tag, newds)); dsl_pool_config_exit(dp, FTAG); } void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag) { + ds_hold_flags_t flags; + + flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE; /* * Stop upgrading thread */ dmu_objset_upgrade_stop(os); - dsl_dataset_disown(os->os_dsl_dataset, - (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag); + dsl_dataset_disown(os->os_dsl_dataset, flags, tag); } void diff --git a/usr/src/uts/common/fs/zfs/dmu_recv.c b/usr/src/uts/common/fs/zfs/dmu_recv.c index 39f365652e..03e0fee4ff 100644 --- a/usr/src/uts/common/fs/zfs/dmu_recv.c +++ b/usr/src/uts/common/fs/zfs/dmu_recv.c @@ -201,7 +201,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) struct drr_begin *drrb = drba->drba_cookie->drc_drrb; uint64_t fromguid = drrb->drr_fromguid; int flags = drrb->drr_flags; - ds_hold_flags_t dsflags = 0; + ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE; int error; uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; @@ -399,7 +399,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) dsl_dataset_t *ds, *newds; objset_t *os; uint64_t dsobj; - ds_hold_flags_t dsflags = 0; + ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE; int error; uint64_t crflags = 0; dsl_crypto_params_t dummy_dcp = { 0 }; @@ -541,7 +541,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) dsl_pool_t *dp = dmu_tx_pool(tx); struct drr_begin *drrb = drba->drba_cookie->drc_drrb; int error; - ds_hold_flags_t dsflags = 0; + ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE; uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; const char *tofs = drba->drba_cookie->drc_tofs; @@ -670,7 +670,7 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; objset_t *os; - ds_hold_flags_t dsflags = 0; + ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE; uint64_t dsobj; /* 6 extra bytes for /%recv */ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; @@ -1824,8 +1824,9 @@ static void dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) { dsl_dataset_t *ds = drc->drc_ds; - ds_hold_flags_t dsflags = (drc->drc_raw) ? 0 : DS_HOLD_FLAG_DECRYPT; + ds_hold_flags_t dsflags; + dsflags = (drc->drc_raw) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT; /* * Wait for the txg sync before cleaning up the receive. For * resumable receives, this ensures that our resume state has @@ -2832,11 +2833,12 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj, dsl_dataset_t *snapds; guid_map_entry_t *gmep; objset_t *os; - ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT; + ds_hold_flags_t dsflags; int err; ASSERT(guid_map != NULL); + dsflags = (raw) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT; err = dsl_pool_hold(name, FTAG, &dp); if (err != 0) return (err); diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index 34cfa2c011..d91a48e2ca 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -1222,9 +1222,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, dsl_pool_t *dp; dsl_dataset_t *ds; dsl_dataset_t *fromds = NULL; - ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT; + ds_hold_flags_t dsflags; int err; + dsflags = (rawok) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT; err = dsl_pool_hold(pool, FTAG, &dp); if (err != 0) return (err); @@ -1287,9 +1288,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, dsl_pool_t *dp; dsl_dataset_t *ds; int err; - ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT; + ds_hold_flags_t dsflags; boolean_t owned = B_FALSE; + dsflags = (rawok) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT; if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) return (SET_ERROR(EINVAL)); diff --git a/usr/src/uts/common/fs/zfs/dmu_zfetch.c b/usr/src/uts/common/fs/zfs/dmu_zfetch.c index 5d6f20d072..60e0f36a5e 100644 --- a/usr/src/uts/common/fs/zfs/dmu_zfetch.c +++ b/usr/src/uts/common/fs/zfs/dmu_zfetch.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. + * Copyright (c) 2013, 2017 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -204,7 +204,8 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid) * TRUE -- prefetch predicted data blocks plus following indirect blocks. */ void -dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) +dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data, + boolean_t have_lock) { zstream_t *zs; int64_t pf_start, ipf_start, ipf_istart, ipf_iend; @@ -233,6 +234,9 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) if (blkid == 0) return; + if (!have_lock) + rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER); + rw_enter(&zf->zf_rwlock, RW_READER); /* @@ -257,6 +261,10 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) /* Already prefetched this before. */ mutex_exit(&zs->zs_lock); rw_exit(&zf->zf_rwlock); + if (!have_lock) { + rw_exit(&zf->zf_dnode-> + dn_struct_rwlock); + } return; } break; @@ -274,6 +282,8 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) if (rw_tryupgrade(&zf->zf_rwlock)) dmu_zfetch_stream_create(zf, end_of_access_blkid); rw_exit(&zf->zf_rwlock); + if (!have_lock) + rw_exit(&zf->zf_dnode->dn_struct_rwlock); return; } @@ -353,5 +363,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) dbuf_prefetch(zf->zf_dnode, 1, iblk, ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH); } + if (!have_lock) + rw_exit(&zf->zf_dnode->dn_struct_rwlock); ZFETCHSTAT_BUMP(zfetchstat_hits); } diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c index 6550a1f066..53aeb42c0e 100644 --- a/usr/src/uts/common/fs/zfs/dnode.c +++ b/usr/src/uts/common/fs/zfs/dnode.c @@ -1346,7 +1346,6 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, } blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t)); - db = dbuf_hold(mdn, blk, FTAG); if (drop_struct_lock) rw_exit(&mdn->dn_struct_rwlock); @@ -1783,10 +1782,11 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) /* resize the old block */ err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db); - if (err == 0) + if (err == 0) { dbuf_new_size(db, size, tx); - else if (err != ENOENT) + } else if (err != ENOENT) { goto fail; + } dnode_setdblksz(dn, size); dnode_setdirty(dn, tx); @@ -2021,7 +2021,6 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) int trunc = FALSE; int epbs; - rw_enter(&dn->dn_struct_rwlock, RW_WRITER); blksz = dn->dn_datablksz; blkshift = dn->dn_datablkshift; epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; @@ -2038,7 +2037,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) head = P2NPHASE(off, blksz); blkoff = P2PHASE(off, blksz); if ((off >> blkshift) > dn->dn_maxblkid) - goto out; + return; } else { ASSERT(dn->dn_maxblkid == 0); if (off == 0 && len >= blksz) { @@ -2047,12 +2046,15 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) */ blkid = 0; nblks = 1; - if (dn->dn_nlevels > 1) + if (dn->dn_nlevels > 1) { + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); dnode_dirty_l1(dn, 0, tx); + rw_exit(&dn->dn_struct_rwlock); + } goto done; } else if (off >= blksz) { /* Freeing past end-of-data */ - goto out; + return; } else { /* Freeing part of the block. */ head = blksz - off; @@ -2062,19 +2064,26 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) } /* zero out any partial block data at the start of the range */ if (head) { + int res; ASSERT3U(blkoff + head, ==, blksz); if (len < head) head = len; - if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off), - TRUE, FALSE, FTAG, &db) == 0) { + rw_enter(&dn->dn_struct_rwlock, RW_READER); + res = dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off), + TRUE, FALSE, FTAG, &db); + rw_exit(&dn->dn_struct_rwlock); + if (res == 0) { caddr_t data; + boolean_t dirty; + db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, + FTAG); /* don't dirty if it isn't on disk and isn't dirty */ - if (db->db_last_dirty || - (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) { - rw_exit(&dn->dn_struct_rwlock); + dirty = db->db_last_dirty || + (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr)); + dmu_buf_unlock_parent(db, dblt, FTAG); + if (dirty) { dmu_buf_will_dirty(&db->db, tx); - rw_enter(&dn->dn_struct_rwlock, RW_WRITER); data = db->db.db_data; bzero(data + blkoff, head); } @@ -2086,11 +2095,11 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) /* If the range was less than one block, we're done */ if (len == 0) - goto out; + return; /* If the remaining range is past end of file, we're done */ if ((off >> blkshift) > dn->dn_maxblkid) - goto out; + return; ASSERT(ISP2(blksz)); if (trunc) @@ -2101,16 +2110,23 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) ASSERT0(P2PHASE(off, blksz)); /* zero out any partial block data at the end of the range */ if (tail) { + int res; if (len < tail) tail = len; - if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len), - TRUE, FALSE, FTAG, &db) == 0) { + rw_enter(&dn->dn_struct_rwlock, RW_READER); + res = dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len), + TRUE, FALSE, FTAG, &db); + rw_exit(&dn->dn_struct_rwlock); + if (res == 0) { + boolean_t dirty; /* don't dirty if not on disk and not dirty */ - if (db->db_last_dirty || - (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) { - rw_exit(&dn->dn_struct_rwlock); + db_lock_type_t type = dmu_buf_lock_parent(db, RW_READER, + FTAG); + dirty = db->db_last_dirty || + (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr)); + dmu_buf_unlock_parent(db, type, FTAG); + if (dirty) { dmu_buf_will_dirty(&db->db, tx); - rw_enter(&dn->dn_struct_rwlock, RW_WRITER); bzero(db->db.db_data, tail); } dbuf_rele(db, FTAG); @@ -2120,7 +2136,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) /* If the range did not include a full block, we are done */ if (len == 0) - goto out; + return; ASSERT(IS_P2ALIGNED(off, blksz)); ASSERT(trunc || IS_P2ALIGNED(len, blksz)); @@ -2150,6 +2166,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) * amount of space if we copy the freed BPs into deadlists. */ if (dn->dn_nlevels > 1) { + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); uint64_t first, last; first = blkid >> epbs; @@ -2194,6 +2211,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) dnode_dirty_l1(dn, i, tx); } + rw_exit(&dn->dn_struct_rwlock); } done: @@ -2215,9 +2233,6 @@ done: dbuf_free_range(dn, blkid, blkid + nblks - 1, tx); dnode_setdirty(dn, tx); -out: - - rw_exit(&dn->dn_struct_rwlock); } static boolean_t @@ -2329,6 +2344,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, dprintf("probing object %llu offset %llx level %d of %u\n", dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels); + ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); + hole = ((flags & DNODE_FIND_HOLE) != 0); inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1; ASSERT(txg == 0 || !hole); @@ -2361,9 +2378,9 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, return (error); } data = db->db.db_data; + rw_enter(&db->db_rwlock, RW_READER); } - if (db != NULL && txg != 0 && (db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg || BP_IS_HOLE(db->db_blkptr))) { @@ -2423,8 +2440,10 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, error = SET_ERROR(ESRCH); } - if (db) + if (db != NULL) { + rw_exit(&db->db_rwlock); dbuf_rele(db, FTAG); + } return (error); } diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index 4a060403da..396d58da17 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -52,7 +52,6 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) /* this dnode can't be paged out because it's dirty */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); - ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0); db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG); @@ -62,8 +61,24 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset, dn->dn_object, dn->dn_phys->dn_nlevels); + /* + * Lock ordering requires that we hold the children's db_mutexes (by + * calling dbuf_find()) before holding the parent's db_rwlock. The lock + * order is imposed by dbuf_read's steps of "grab the lock to protect + * db_parent, get db_parent, hold db_parent's db_rwlock". + */ + dmu_buf_impl_t *children[DN_MAX_NBLKPTR]; + ASSERT3U(nblkptr, <=, DN_MAX_NBLKPTR); + for (i = 0; i < nblkptr; i++) { + children[i] = + dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i); + } + /* transfer dnode's block pointers to new indirect block */ (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT); + if (dn->dn_dbuf != NULL) + rw_enter(&dn->dn_dbuf->db_rwlock, RW_WRITER); + rw_enter(&db->db_rwlock, RW_WRITER); ASSERT(db->db.db_data); ASSERT(arc_released(db->db_buf)); ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size); @@ -73,8 +88,7 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) /* set dbuf's parent pointers to new indirect buf */ for (i = 0; i < nblkptr; i++) { - dmu_buf_impl_t *child = - dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i); + dmu_buf_impl_t *child = children[i]; if (child == NULL) continue; @@ -107,6 +121,10 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr); + rw_exit(&db->db_rwlock); + if (dn->dn_dbuf != NULL) + rw_exit(&dn->dn_dbuf->db_rwlock); + dbuf_rele(db, FTAG); rw_exit(&dn->dn_struct_rwlock); @@ -183,7 +201,7 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) ASSERT(db->db_level == 1); rw_enter(&dn->dn_struct_rwlock, RW_READER); - err = dbuf_hold_impl(dn, db->db_level-1, + err = dbuf_hold_impl(dn, db->db_level - 1, (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child); rw_exit(&dn->dn_struct_rwlock); if (err == ENOENT) @@ -281,7 +299,9 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, * ancestor of the first or last block to be freed. The first and * last L1 indirect blocks are always dirtied by dnode_free_range(). */ + db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); VERIFY(BP_GET_FILL(db->db_blkptr) == 0 || db->db_dirtycnt > 0); + dmu_buf_unlock_parent(db, dblt, FTAG); dbuf_release_bp(db); bp = db->db.db_data; @@ -307,7 +327,9 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, if (db->db_level == 1) { FREE_VERIFY(db, start, end, tx); - free_blocks(dn, bp, end-start+1, tx); + rw_enter(&db->db_rwlock, RW_WRITER); + free_blocks(dn, bp, end - start + 1, tx); + rw_exit(&db->db_rwlock); } else { for (uint64_t id = start; id <= end; id++, bp++) { if (BP_IS_HOLE(bp)) @@ -324,10 +346,12 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, } if (free_indirects) { + rw_enter(&db->db_rwlock, RW_WRITER); for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) ASSERT(BP_IS_HOLE(bp)); bzero(db->db.db_data, db->db.db_size); free_blocks(dn, db->db_blkptr, 1, tx); + rw_exit(&db->db_rwlock); } DB_DNODE_EXIT(db); @@ -379,7 +403,6 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i, TRUE, FALSE, FTAG, &db)); rw_exit(&dn->dn_struct_rwlock); - free_children(db, blkid, nblks, free_indirects, tx); dbuf_rele(db, FTAG); } diff --git a/usr/src/uts/common/fs/zfs/sys/dbuf.h b/usr/src/uts/common/fs/zfs/sys/dbuf.h index 271232c61c..7482006eb1 100644 --- a/usr/src/uts/common/fs/zfs/sys/dbuf.h +++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h @@ -108,6 +108,12 @@ typedef enum override_states { DR_OVERRIDDEN } override_states_t; +typedef enum db_lock_type { + DLT_NONE, + DLT_PARENT, + DLT_OBJSET +} db_lock_type_t; + typedef struct dbuf_dirty_record { /* link on our parents dirty list */ list_node_t dr_dirty_node; @@ -217,6 +223,22 @@ typedef struct dmu_buf_impl { */ uint8_t db_level; + /* + * Protects db_buf's contents if they contain an indirect block or data + * block of the meta-dnode. We use this lock to protect the structure of + * the block tree. This means that when modifying this dbuf's data, we + * grab its rwlock. When modifying its parent's data (including the + * blkptr to this dbuf), we grab the parent's rwlock. The lock ordering + * for this lock is: + * 1) dn_struct_rwlock + * 2) db_rwlock + * We don't currently grab multiple dbufs' db_rwlocks at once. + */ + krwlock_t db_rwlock; + + /* buffer holding our data */ + arc_buf_t *db_buf; + /* db_mtx protects the members below */ kmutex_t db_mtx; @@ -232,9 +254,6 @@ typedef struct dmu_buf_impl { */ zfs_refcount_t db_holds; - /* buffer holding our data */ - arc_buf_t *db_buf; - kcondvar_t db_changed; dbuf_dirty_record_t *db_data_pending; @@ -336,6 +355,8 @@ void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx); void dbuf_unoverride(dbuf_dirty_record_t *dr); void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx); void dbuf_release_bp(dmu_buf_impl_t *db); +db_lock_type_t dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag); +void dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag); boolean_t dbuf_can_remap(const dmu_buf_impl_t *buf); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h b/usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h index 21a3ff3a20..d426cc282b 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2014 by Delphix. All rights reserved. + * Copyright (c) 2014, 2017 by Delphix. All rights reserved. */ #ifndef _DMU_ZFETCH_H @@ -66,7 +66,8 @@ void zfetch_fini(void); void dmu_zfetch_init(zfetch_t *, struct dnode *); void dmu_zfetch_fini(zfetch_t *); -void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t); +void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, + boolean_t); #ifdef __cplusplus diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index 189376eefc..0fd7e1a7e2 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -306,6 +306,7 @@ typedef struct dsl_dataset_snapshot_arg { /* flags for holding the dataset */ typedef enum ds_hold_flags { + DS_HOLD_FLAG_NONE = 0 << 0, DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */ } ds_hold_flags_t; diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c index a7ea684f9c..2f71105178 100644 --- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c +++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c @@ -1366,7 +1366,8 @@ gcpu_mca_init(cmi_hdl_t hdl) */ if (!gcpu_suppress_log_on_init && ((vendor == X86_VENDOR_Intel && family >= 0xf) || - vendor == X86_VENDOR_AMD)) + vendor == X86_VENDOR_AMD || + vendor == X86_VENDOR_HYGON)) gcpu_mca_logout(hdl, NULL, -1ULL, NULL, B_FALSE, GCPU_MPT_WHAT_POKE_ERR); diff --git a/usr/src/uts/i86pc/os/cmi_hw.c b/usr/src/uts/i86pc/os/cmi_hw.c index aa549569b0..fb59826431 100644 --- a/usr/src/uts/i86pc/os/cmi_hw.c +++ b/usr/src/uts/i86pc/os/cmi_hw.c @@ -1272,6 +1272,7 @@ cmi_hdl_create(enum cmi_hdl_class class, uint_t chipid, uint_t coreid, switch (vendor) { case X86_VENDOR_Intel: case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: if (cmi_ext_topo_check == 0) { cpuid_get_ext_topo((cpu_t *)priv, &cmi_core_nbits, &cmi_strand_nbits); diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index 6f54dee7f9..c40173d4c8 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -1817,6 +1817,7 @@ platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) } break; case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: switch (eax) { case 0x80000001: @@ -2077,7 +2078,8 @@ cpuid_gather_apicid(struct cpuid_info *cpi) } } - if (cpi->cpi_vendor == X86_VENDOR_AMD && + if ((cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) && is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { return (cpi->cpi_extd[0x1e].cp_eax); @@ -2742,7 +2744,8 @@ cpuid_use_amd_retpoline(struct cpuid_info *cpi) uint64_t val; on_trap_data_t otd; - if (cpi->cpi_vendor != X86_VENDOR_AMD) + if (cpi->cpi_vendor != X86_VENDOR_AMD && + cpi->cpi_vendor != X86_VENDOR_HYGON) return (B_FALSE); /* @@ -2881,7 +2884,8 @@ cpuid_scan_security(cpu_t *cpu, uchar_t *featureset) struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; x86_spectrev2_mitigation_t v2mit; - if (cpi->cpi_vendor == X86_VENDOR_AMD && + if ((cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) && cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB) add_x86_feature(featureset, X86FSET_IBPB); @@ -3092,7 +3096,8 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset) cpi = cpu->cpu_m.mcpu_cpi; - if (cpi->cpi_vendor == X86_VENDOR_AMD) { + if (cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) { cpuid_gather_amd_topology_leaves(cpu); } @@ -3108,6 +3113,7 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset) &cpi->cpi_ncore_per_chip); break; case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip, &cpi->cpi_ncore_per_chip); break; @@ -3157,7 +3163,8 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset) cpi->cpi_clogid = 0; cpi->cpi_coreid = cpu->cpu_id; cpi->cpi_pkgcoreid = 0; - if (cpi->cpi_vendor == X86_VENDOR_AMD) { + if (cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) { cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); } else { cpi->cpi_procnodeid = cpi->cpi_chipid; @@ -3168,6 +3175,7 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset) cpuid_intel_getids(cpu, featureset); break; case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: cpuid_amd_getids(cpu, featureset); break; default: @@ -3358,6 +3366,9 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) if (CPI_FAMILY(cpi) == 0xf) cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; break; + case X86_VENDOR_HYGON: + cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; + break; default: if (cpi->cpi_model == 0xf) cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; @@ -3471,6 +3482,10 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) #endif break; + case X86_VENDOR_HYGON: + /* Enable all for Hygon Dhyana CPU */ + mask_ecx = 0xffffffff; + break; case X86_VENDOR_TM: /* * workaround the NT workaround in CMS 4.1 @@ -3934,6 +3949,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) x86_type == X86_TYPE_CYRIX_GXm) xcpuid++; break; + case X86_VENDOR_HYGON: case X86_VENDOR_Centaur: case X86_VENDOR_TM: default: @@ -3955,6 +3971,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) switch (cpi->cpi_vendor) { case X86_VENDOR_Intel: case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: if (cpi->cpi_xmaxeax < 0x80000001) break; cp = &cpi->cpi_extd[1]; @@ -3998,7 +4015,8 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) add_x86_feature(featureset, X86FSET_1GPG); } - if ((cpi->cpi_vendor == X86_VENDOR_AMD) && + if ((cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) && (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) { add_x86_feature(featureset, X86FSET_SSE4A); @@ -4019,7 +4037,8 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) * that AMD processors don't support sysenter * in long mode at all, so don't try to program them. */ - if (x86_vendor == X86_VENDOR_AMD) { + if (x86_vendor == X86_VENDOR_AMD || + x86_vendor == X86_VENDOR_HYGON) { remove_x86_feature(featureset, X86FSET_SEP); } @@ -4073,6 +4092,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) } /*FALLTHROUGH*/ case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) break; cp = &cpi->cpi_extd[8]; @@ -4084,7 +4104,8 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) /* * AMD uses ebx for some extended functions. */ - if (cpi->cpi_vendor == X86_VENDOR_AMD) { + if (cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) { /* * While we're here, check for the AMD "Error * Pointer Zero/Restore" feature. This can be @@ -4120,6 +4141,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) switch (cpi->cpi_vendor) { case X86_VENDOR_Intel: case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: if (cpi->cpi_maxeax >= 7) { cp = &cpi->cpi_extd[7]; cp->cp_eax = 0x80000007; @@ -4152,7 +4174,8 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); - if (cpi->cpi_vendor == X86_VENDOR_AMD) { + if (cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) { if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 && cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) { /* Special handling for AMD FP not necessary. */ @@ -5032,7 +5055,8 @@ cpuid_pass3(cpu_t *cpu) cpi->cpi_last_lvl_cacheid = cpu->cpu_id; if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) || - (cpi->cpi_vendor == X86_VENDOR_AMD && + ((cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) && cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d && is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) { uint32_t leaf; @@ -5401,6 +5425,7 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) /*FALLTHROUGH*/ case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: edx = &cpi->cpi_support[AMD_EDX_FEATURES]; ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; @@ -5417,6 +5442,7 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) break; case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) *edx &= ~CPUID_AMD_EDX_TSCP; if (!is_x86_feature(x86_featureset, X86FSET_SSE4A)) @@ -5459,6 +5485,7 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) switch (cpi->cpi_vendor) { case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: if (*edx & CPUID_AMD_EDX_TSCP) hwcap_flags |= AV_386_TSCP; if (*ecx & CPUID_AMD_ECX_AHF64) @@ -5603,7 +5630,8 @@ cpuid_syscall32_insn(cpu_t *cpu) { struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; - if (cpi->cpi_vendor == X86_VENDOR_AMD && + if ((cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) && cpi->cpi_xmaxeax >= 0x80000001 && (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) return (1); @@ -5821,7 +5849,9 @@ cpuid_have_cr8access(cpu_t *cpu) ASSERT(cpu != NULL); cpi = cpu->cpu_m.mcpu_cpi; - if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && + if ((cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) && + cpi->cpi_maxeax >= 1 && (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) return (1); return (0); @@ -6789,6 +6819,8 @@ x86_which_cacheinfo(struct cpuid_info *cpi) (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) return (X86_VENDOR_AMD); break; + case X86_VENDOR_HYGON: + return (X86_VENDOR_AMD); case X86_VENDOR_TM: if (cpi->cpi_family >= 5) return (X86_VENDOR_AMD); @@ -6885,6 +6917,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, case X86_VENDOR_AMD: create = cpi->cpi_family >= 0xf; break; + case X86_VENDOR_HYGON: + create = 1; + break; default: create = 0; break; @@ -6901,6 +6936,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, case X86_VENDOR_AMD: create = CPI_FAMILY(cpi) == 0xf; break; + case X86_VENDOR_HYGON: + create = 1; + break; default: create = 0; break; @@ -6912,6 +6950,7 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, /* generation */ switch (cpi->cpi_vendor) { case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: /* * AMD K5 model 1 was the first part to support this */ @@ -6938,6 +6977,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, case X86_VENDOR_AMD: create = cpi->cpi_family >= 0xf; break; + case X86_VENDOR_HYGON: + create = 1; + break; default: create = 0; break; @@ -6958,6 +7000,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, case X86_VENDOR_AMD: create = cpi->cpi_family >= 0xf; break; + case X86_VENDOR_HYGON: + create = 1; + break; default: create = 0; break; @@ -6988,6 +7033,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, case X86_VENDOR_AMD: create = cpi->cpi_family >= 0xf; break; + case X86_VENDOR_HYGON: + create = 1; + break; default: create = 0; break; @@ -7000,6 +7048,7 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, switch (cpi->cpi_vendor) { case X86_VENDOR_Intel: case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: case X86_VENDOR_Cyrix: case X86_VENDOR_TM: case X86_VENDOR_Centaur: @@ -7513,7 +7562,8 @@ cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset) cp.cp_ecx = 0; (void) __cpuid_insn(&cp); cpi->cpi_std[7] = cp; - } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { + } else if (cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) { /* No xcpuid support */ if (cpi->cpi_family < 5 || (cpi->cpi_family == 5 && cpi->cpi_model < 1)) diff --git a/usr/src/uts/i86pc/os/cpuid_subr.c b/usr/src/uts/i86pc/os/cpuid_subr.c index faa3e75b03..934b5d547d 100644 --- a/usr/src/uts/i86pc/os/cpuid_subr.c +++ b/usr/src/uts/i86pc/os/cpuid_subr.c @@ -88,12 +88,13 @@ * 15 for family 0x17, models 30 - 3f * 16 for family 0x17, models 60 - 6f * 17 for family 0x17, models 70 - 7f - * 18 for family 0x19, models 00 - 0f - * 19 for family 0x19, models 20 - 2f + * 18 for family 0x18, models 00 - 0f + * 19 for family 0x19, models 00 - 0f + * 20 for family 0x19, models 20 - 2f * Second index by (model & 0x3) for family 0fh, * CPUID pkg bits (Fn8000_0001_EBX[31:28]) for later families. */ -static uint32_t amd_skts[20][8] = { +static uint32_t amd_skts[21][8] = { /* * Family 0xf revisions B through E */ @@ -365,7 +366,7 @@ static uint32_t amd_skts[20][8] = { }, /* - * Family 0x19 models 00-0f (Zen 3 - Milan) + * Family 0x18 models 00-0f (Dhyana) */ #define A_SKTS_18 18 { @@ -373,6 +374,21 @@ static uint32_t amd_skts[20][8] = { X86_SOCKET_UNKNOWN, /* 0b001 */ X86_SOCKET_UNKNOWN, /* 0b010 */ X86_SOCKET_UNKNOWN, /* 0b011 */ + X86_SOCKET_SL1, /* 0b100 */ + X86_SOCKET_UNKNOWN, /* 0b101 */ + X86_SOCKET_DM1, /* 0b110 */ + X86_SOCKET_SL1R2 /* 0b111 */ + }, + + /* + * Family 0x19 models 00-0f (Zen 3 - Milan) + */ +#define A_SKTS_19 19 + { + X86_SOCKET_UNKNOWN, /* 0b000 */ + X86_SOCKET_UNKNOWN, /* 0b001 */ + X86_SOCKET_UNKNOWN, /* 0b010 */ + X86_SOCKET_UNKNOWN, /* 0b011 */ X86_SOCKET_SP3, /* 0b100 */ X86_SOCKET_UNKNOWN, /* 0b101 */ X86_SOCKET_UNKNOWN, /* 0b110 */ @@ -382,7 +398,7 @@ static uint32_t amd_skts[20][8] = { /* * Family 0x19 models 20-2f (Zen 3 - Vermeer) */ -#define A_SKTS_19 19 +#define A_SKTS_20 20 { X86_SOCKET_UNKNOWN, /* 0b000 */ X86_SOCKET_UNKNOWN, /* 0b001 */ @@ -399,7 +415,7 @@ struct amd_sktmap_s { uint32_t skt_code; char sktstr[16]; }; -static struct amd_sktmap_s amd_sktmap_strs[X86_NUM_SOCKETS_AMD + 1] = { +static struct amd_sktmap_s amd_sktmap_strs[X86_NUM_SOCKETS + 1] = { { X86_SOCKET_754, "754" }, { X86_SOCKET_939, "939" }, { X86_SOCKET_940, "940" }, @@ -434,6 +450,9 @@ static struct amd_sktmap_s amd_sktmap_strs[X86_NUM_SOCKETS_AMD + 1] = { { X86_SOCKET_FP5, "FP5" }, { X86_SOCKET_FP6, "FP6" }, { X86_SOCKET_STRX4, "sTRX4" }, + { X86_SOCKET_SL1, "SL1" }, + { X86_SOCKET_SL1R2, "SL1R2" }, + { X86_SOCKET_DM1, "DM1" }, { X86_SOCKET_UNKNOWN, "Unknown" } }; @@ -459,8 +478,9 @@ static const struct amd_skt_mapent { { 0x17, 0x30, 0x3f, A_SKTS_15 }, { 0x17, 0x60, 0x6f, A_SKTS_16 }, { 0x17, 0x70, 0x7f, A_SKTS_17 }, - { 0x19, 0x00, 0x0f, A_SKTS_18 }, - { 0x19, 0x20, 0x2f, A_SKTS_19 } + { 0x18, 0x00, 0x0f, A_SKTS_18 }, + { 0x19, 0x00, 0x0f, A_SKTS_19 }, + { 0x19, 0x20, 0x2f, A_SKTS_20 } }; /* @@ -629,7 +649,13 @@ static const struct amd_rev_mapent { A_SKTS_15 }, { 0x17, 0x71, 0x71, 0x0, 0x0, X86_CHIPREV_AMD_17_MTS_B0, "MTS-B0", - A_SKTS_17 } + A_SKTS_17 }, + + /* + * =============== HygonGenuine Family 0x18 =============== + */ + { 0x18, 0x00, 0x00, 0x1, 0x1, X86_CHIPREV_HYGON_18_DN_A1, "DN_A1", + A_SKTS_18 }, }; /* @@ -759,6 +785,7 @@ _cpuid_skt(uint_t vendor, uint_t family, uint_t model, uint_t step) switch (vendor) { case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: synth_amd_info(family, model, step, &skt, NULL, NULL); break; @@ -779,6 +806,7 @@ _cpuid_sktstr(uint_t vendor, uint_t family, uint_t model, uint_t step) switch (vendor) { case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: synth_amd_info(family, model, step, &skt, NULL, NULL); sktmapp = amd_sktmap_strs; @@ -805,6 +833,7 @@ _cpuid_chiprev(uint_t vendor, uint_t family, uint_t model, uint_t step) switch (vendor) { case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: synth_amd_info(family, model, step, NULL, &chiprev, NULL); break; @@ -823,6 +852,7 @@ _cpuid_chiprevstr(uint_t vendor, uint_t family, uint_t model, uint_t step) switch (vendor) { case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: synth_amd_info(family, model, step, NULL, NULL, &revstr); break; @@ -851,6 +881,8 @@ _cpuid_vendorstr_to_vendorcode(char *vendorstr) return (X86_VENDOR_Intel); else if (strcmp(vendorstr, X86_VENDORSTR_AMD) == 0) return (X86_VENDOR_AMD); + else if (strcmp(vendorstr, X86_VENDORSTR_HYGON) == 0) + return (X86_VENDOR_HYGON); else if (strcmp(vendorstr, X86_VENDORSTR_TM) == 0) return (X86_VENDOR_TM); else if (strcmp(vendorstr, CyrixInstead) == 0) diff --git a/usr/src/uts/i86pc/os/cpupm/cpupm_amd.c b/usr/src/uts/i86pc/os/cpupm/cpupm_amd.c index 086d9a8fe6..c99c191c40 100644 --- a/usr/src/uts/i86pc/os/cpupm/cpupm_amd.c +++ b/usr/src/uts/i86pc/os/cpupm/cpupm_amd.c @@ -37,8 +37,9 @@ cpupm_amd_init(cpu_t *cp) cpupm_mach_state_t *mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); - /* AMD? */ - if (x86_vendor != X86_VENDOR_AMD) + /* AMD or Hygon? */ + if (x86_vendor != X86_VENDOR_AMD && + x86_vendor != X86_VENDOR_HYGON) return (B_FALSE); /* diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c index 0e84030ac1..a53c797e4b 100644 --- a/usr/src/uts/i86pc/os/hma.c +++ b/usr/src/uts/i86pc/os/hma.c @@ -101,6 +101,7 @@ hma_init(void) (void) hma_vmx_init(); break; case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: (void) hma_svm_init(); break; default: @@ -121,6 +122,7 @@ hma_register_backend(const char *name) is_ready = hma_vmx_ready; break; case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: is_ready = hma_svm_ready; break; default: diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index dd2b5d703b..ac0ff1716a 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -3212,6 +3212,7 @@ setx86isalist(void) switch (x86_vendor) { case X86_VENDOR_Intel: case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: case X86_VENDOR_TM: if (is_x86_feature(x86_featureset, X86FSET_CMOV)) { /* diff --git a/usr/src/uts/intel/ia32/os/cpc_subr.c b/usr/src/uts/intel/ia32/os/cpc_subr.c index f7b86fd602..a74dfd77bc 100644 --- a/usr/src/uts/intel/ia32/os/cpc_subr.c +++ b/usr/src/uts/intel/ia32/os/cpc_subr.c @@ -140,7 +140,8 @@ kcpc_hw_init(cpu_t *cp) strands_perfmon_shared = 1; } } - } else if (cpuid_getvendor(cpu[0]) == X86_VENDOR_AMD) { + } else if (cpuid_getvendor(cpu[0]) == X86_VENDOR_AMD || + cpuid_getvendor(cpu[0]) == X86_VENDOR_HYGON) { /* * On AMD systems with HT, all of the performance * monitors exist on a per-logical CPU basis. diff --git a/usr/src/uts/intel/io/amdzen/amdzen.c b/usr/src/uts/intel/io/amdzen/amdzen.c index ac6ce9c94f..bd023a2edf 100644 --- a/usr/src/uts/intel/io/amdzen/amdzen.c +++ b/usr/src/uts/intel/io/amdzen/amdzen.c @@ -624,7 +624,7 @@ amdzen_stub_scan_cb(dev_info_t *dip, void *arg) return (DDI_WALK_CONTINUE); } - if (vid != AMDZEN_PCI_VID_AMD) { + if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) { return (DDI_WALK_CONTINUE); } @@ -737,9 +737,10 @@ amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } - if (vid != AMDZEN_PCI_VID_AMD) { - dev_err(dip, CE_WARN, "expected AMD vendor ID (0x%x), found " - "0x%x", AMDZEN_PCI_VID_AMD, vid); + if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) { + dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x", + cpuid_getvendor(CPU) == X86_VENDOR_HYGON ? + AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid); return (DDI_FAILURE); } @@ -996,7 +997,8 @@ _init(void) { int ret; - if (cpuid_getvendor(CPU) != X86_VENDOR_AMD) { + if (cpuid_getvendor(CPU) != X86_VENDOR_AMD && + cpuid_getvendor(CPU) != X86_VENDOR_HYGON) { return (ENOTSUP); } diff --git a/usr/src/uts/intel/io/amdzen/amdzen.h b/usr/src/uts/intel/io/amdzen/amdzen.h index 8150495911..6ba5266bd3 100644 --- a/usr/src/uts/intel/io/amdzen/amdzen.h +++ b/usr/src/uts/intel/io/amdzen/amdzen.h @@ -200,6 +200,11 @@ typedef enum { */ #define AMDZEN_PCI_VID_AMD 0x1022 +/* + * Hygon PCI ID for reference + */ +#define AMDZEN_PCI_VID_HYGON 0x1d94 + typedef enum { AMDZEN_STUB_TYPE_DF, AMDZEN_STUB_TYPE_NB diff --git a/usr/src/uts/intel/pcbe/opteron_pcbe.c b/usr/src/uts/intel/pcbe/opteron_pcbe.c index c4496bf8ca..8d567daa64 100644 --- a/usr/src/uts/intel/pcbe/opteron_pcbe.c +++ b/usr/src/uts/intel/pcbe/opteron_pcbe.c @@ -547,7 +547,8 @@ opt_pcbe_init(void) * loads this module based on its name in the module directory, but it * could have been renamed. */ - if (cpuid_getvendor(CPU) != X86_VENDOR_AMD || amd_family < 0xf) + if ((cpuid_getvendor(CPU) != X86_VENDOR_AMD || amd_family < 0xf) && + cpuid_getvendor(CPU) != X86_VENDOR_HYGON) return (-1); if (amd_family == 0xf) { @@ -556,7 +557,9 @@ opt_pcbe_init(void) "AMD Opteron & Athlon64"); } else { (void) snprintf(amd_pcbe_impl_name, sizeof (amd_pcbe_impl_name), - "AMD Family %02xh", amd_family); + "%s Family %02xh", + cpuid_getvendor(CPU) == X86_VENDOR_HYGON ? "Hygon" : "AMD", + amd_family); } /* @@ -598,7 +601,8 @@ opt_pcbe_init(void) amd_pcbe_cpuref = amd_fam_11h_bkdg; amd_events = family_11h_events; amd_generic_events = opt_generic_events; - } else if (amd_family == 0x17 && amd_model <= 0x2f) { + } else if ((amd_family == 0x17 && amd_model <= 0x2f) || + amd_family == 0x18) { amd_pcbe_cpuref = amd_fam_17h_zen1_reg; amd_events = opteron_pcbe_f17h_zen1_events; amd_generic_events = family_17h_zen1_papi_events; diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index b75ab18f5e..2ec543677b 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -804,6 +804,9 @@ extern "C" { #define X86_VENDOR_NSC 10 #define X86_VENDORSTR_NSC "Geode by NSC" +#define X86_VENDOR_HYGON 11 +#define X86_VENDORSTR_HYGON "HygonGenuine" + /* * Vendor string max len + \0 */ @@ -968,6 +971,12 @@ extern "C" { _X86_CHIPREV_MKREV(X86_VENDOR_AMD, 0x17, 0x0009) /* + * Definitions for Hygon Family 0x18 + */ +#define X86_CHIPREV_HYGON_18_DN_A1 \ + _X86_CHIPREV_MKREV(X86_VENDOR_HYGON, 0x18, 0x0001) + +/* * Various socket/package types, extended as the need to distinguish * a new type arises. The top 8 byte identfies the vendor and the * remaining 24 bits describe 24 socket types. @@ -1026,6 +1035,15 @@ extern "C" { #define X86_SOCKET_STRX4 _X86_SOCKET_MKVAL(X86_VENDOR_AMD, 0x23) #define X86_NUM_SOCKETS_AMD 0x24 +/* + * Hygon socket types + */ +#define X86_SOCKET_SL1 _X86_SOCKET_MKVAL(X86_VENDOR_HYGON, 0x01) +#define X86_SOCKET_SL1R2 _X86_SOCKET_MKVAL(X86_VENDOR_HYGON, 0x02) +#define X86_SOCKET_DM1 _X86_SOCKET_MKVAL(X86_VENDOR_HYGON, 0x03) +#define X86_NUM_SOCKETS_HYGON 0x04 + +#define X86_NUM_SOCKETS (X86_NUM_SOCKETS_AMD + X86_NUM_SOCKETS_HYGON) /* * Definitions for Intel processor models. These are all for Family 6 diff --git a/usr/src/uts/sun4u/cpu/us3_common.c b/usr/src/uts/sun4u/cpu/us3_common.c index 38a06c2731..14a2096e25 100644 --- a/usr/src/uts/sun4u/cpu/us3_common.c +++ b/usr/src/uts/sun4u/cpu/us3_common.c @@ -3331,7 +3331,7 @@ ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp, case CE_ACT_DONE: break; - case CE_ACT(CE_DISP_BAD): + case CE_DISP_BAD: default: #ifdef DEBUG cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action); diff --git a/usr/src/uts/sun4u/io/px/px_hlib.c b/usr/src/uts/sun4u/io/px/px_hlib.c index 11c529dfa7..a6ae488a76 100644 --- a/usr/src/uts/sun4u/io/px/px_hlib.c +++ b/usr/src/uts/sun4u/io/px/px_hlib.c @@ -161,9 +161,9 @@ static uint64_t msiq_config_other_regs[] = { #define MSIQ_MAPPING_SIZE (MSI_MAPPING_ENTRIES * sizeof (uint64_t)) /* OPL tuning variables for link unstable issue */ -int wait_perst = 5000000; /* step 9, default: 5s */ +int wait_perst = 5000000; /* step 9, default: 5s */ int wait_enable_port = 30000; /* step 11, default: 30ms */ -int link_retry_count = 2; /* step 11, default: 2 */ +int link_retry_count = 2; /* step 11, default: 2 */ int link_status_check = 400000; /* step 11, default: 400ms */ static uint64_t msiq_suspend(devhandle_t dev_hdl, pxu_t *pxu_p); @@ -2108,7 +2108,7 @@ uint64_t hvio_intr_getstate(devhandle_t dev_hdl, sysino_t sysino, intr_state_t *intr_state) { - intr_state_t state; + uint64_t state; state = CSRA_FR((caddr_t)dev_hdl, INTERRUPT_CLEAR, SYSINO_TO_DEVINO(sysino), ENTRIES_INT_STATE); diff --git a/usr/src/uts/sun4u/sunfire/io/fhc_bd.c b/usr/src/uts/sun4u/sunfire/io/fhc_bd.c index 5bc3b2fc0b..0a1936086b 100644 --- a/usr/src/uts/sun4u/sunfire/io/fhc_bd.c +++ b/usr/src/uts/sun4u/sunfire/io/fhc_bd.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/conf.h> #include <sys/ddi.h> @@ -768,7 +766,7 @@ fhc_bdlist_prime(int first, int count, int incr) type = jtag_get_board_type(jtm->jtag_cmd, sc); switch (type) { - case -1: + case EMPTY_BOARD: fhc_bd_sc_evt(sc, SYSC_EVT_BD_EMPTY); continue; case DISK_BOARD: diff --git a/usr/src/uts/sun4u/sunfire/io/jtag.c b/usr/src/uts/sun4u/sunfire/io/jtag.c index 7dc2a74dd7..71396af2ed 100644 --- a/usr/src/uts/sun4u/sunfire/io/jtag.c +++ b/usr/src/uts/sun4u/sunfire/io/jtag.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/param.h> #include <sys/ddi.h> @@ -1165,7 +1163,7 @@ jtag_check_plus_board( /* * Returns (positive) board type if something detected, including * UNKNOWN_BOARD. - * Returns -1 if nothing there. + * Returns EMPTY_BOARD if nothing there. */ enum board_type jtag_get_board_type(volatile u_int *jreg, sysc_cfga_stat_t *sc) @@ -1261,7 +1259,7 @@ jtag_get_board_type(volatile u_int *jreg, sysc_cfga_stat_t *sc) break; case RING_BROKEN: - result = -1; + result = EMPTY_BOARD; break; default: diff --git a/usr/src/uts/sun4u/sunfire/sys/fhc.h b/usr/src/uts/sun4u/sunfire/sys/fhc.h index f66a5003cd..a76231e781 100644 --- a/usr/src/uts/sun4u/sunfire/sys/fhc.h +++ b/usr/src/uts/sun4u/sunfire/sys/fhc.h @@ -27,8 +27,6 @@ #ifndef _SYS_FHC_H #define _SYS_FHC_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -486,6 +484,7 @@ struct ft_link_list { * boards. It is used by both the kernel and user programs. */ enum board_type { + EMPTY_BOARD = -1, UNINIT_BOARD = 0, /* Uninitialized board type */ UNKNOWN_BOARD, /* Unknown board type */ CPU_BOARD, /* System board CPU(s) */ |
