summaryrefslogtreecommitdiff
path: root/usr/src/uts
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts')
-rw-r--r--usr/src/uts/common/fs/zfs/dbuf.c201
-rw-r--r--usr/src/uts/common/fs/zfs/dmu.c9
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_objset.c27
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_recv.c14
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_send.c6
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_zfetch.c16
-rw-r--r--usr/src/uts/common/fs/zfs/dnode.c73
-rw-r--r--usr/src/uts/common/fs/zfs/dnode_sync.c35
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dbuf.h27
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h5
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_dataset.h1
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c3
-rw-r--r--usr/src/uts/i86pc/os/cmi_hw.c1
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c76
-rw-r--r--usr/src/uts/i86pc/os/cpuid_subr.c50
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpupm_amd.c5
-rw-r--r--usr/src/uts/i86pc/os/hma.c2
-rw-r--r--usr/src/uts/i86pc/os/startup.c1
-rw-r--r--usr/src/uts/intel/ia32/os/cpc_subr.c3
-rw-r--r--usr/src/uts/intel/io/amdzen/amdzen.c12
-rw-r--r--usr/src/uts/intel/io/amdzen/amdzen.h5
-rw-r--r--usr/src/uts/intel/pcbe/opteron_pcbe.c10
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h18
-rw-r--r--usr/src/uts/sun4u/cpu/us3_common.c2
-rw-r--r--usr/src/uts/sun4u/io/px/px_hlib.c6
-rw-r--r--usr/src/uts/sun4u/sunfire/io/fhc_bd.c4
-rw-r--r--usr/src/uts/sun4u/sunfire/io/jtag.c6
-rw-r--r--usr/src/uts/sun4u/sunfire/sys/fhc.h3
28 files changed, 436 insertions, 185 deletions
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index ae0b1fc878..a8569e9a88 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -176,6 +176,7 @@ dbuf_cons(void *vdb, void *unused, int kmflag)
bzero(db, sizeof (dmu_buf_impl_t));
mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
+ rw_init(&db->db_rwlock, NULL, RW_DEFAULT, NULL);
cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
multilist_link_init(&db->db_cache_link);
zfs_refcount_create(&db->db_holds);
@@ -189,6 +190,7 @@ dbuf_dest(void *vdb, void *unused)
{
dmu_buf_impl_t *db = vdb;
mutex_destroy(&db->db_mtx);
+ rw_destroy(&db->db_rwlock);
cv_destroy(&db->db_changed);
ASSERT(!multilist_link_active(&db->db_cache_link));
zfs_refcount_destroy(&db->db_holds);
@@ -789,10 +791,10 @@ dbuf_verify(dmu_buf_impl_t *db)
db->db.db_object);
/*
* dnode_grow_indblksz() can make this fail if we don't
- * have the struct_rwlock. XXX indblksz no longer
+ * have the parent's rwlock. XXX indblksz no longer
* grows. safe to do this now?
*/
- if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
+ if (RW_LOCK_HELD(&db->db_parent->db_rwlock)) {
ASSERT3P(db->db_blkptr, ==,
((blkptr_t *)db->db_parent->db.db_data +
db->db_blkid % epb));
@@ -868,6 +870,44 @@ dbuf_clear_data(dmu_buf_impl_t *db)
db->db_state = DB_UNCACHED;
}
+/*
+ * This function is used to lock the parent of the provided dbuf. This should be
+ * used when modifying or reading db_blkptr.
+ */
+db_lock_type_t
+dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag)
+{
+ enum db_lock_type ret = DLT_NONE;
+ if (db->db_parent != NULL) {
+ rw_enter(&db->db_parent->db_rwlock, rw);
+ ret = DLT_PARENT;
+ } else if (dmu_objset_ds(db->db_objset) != NULL) {
+ rrw_enter(&dmu_objset_ds(db->db_objset)->ds_bp_rwlock, rw,
+ tag);
+ ret = DLT_OBJSET;
+ }
+ /*
+ * We only return a DLT_NONE lock when it's the top-most indirect block
+ * of the meta-dnode of the MOS.
+ */
+ return (ret);
+}
+
+/*
+ * We need to pass the lock type in because it's possible that the block will
+ * move from being the topmost indirect block in a dnode (and thus, have no
+ * parent) to not the top-most via an indirection increase. This would cause a
+ * panic if we didn't pass the lock type in.
+ */
+void
+dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag)
+{
+ if (type == DLT_PARENT)
+ rw_exit(&db->db_parent->db_rwlock);
+ else if (type == DLT_OBJSET)
+ rrw_exit(&dmu_objset_ds(db->db_objset)->ds_bp_rwlock, tag);
+}
+
static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
@@ -1042,8 +1082,13 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
return (err);
}
+/*
+ * Drops db_mtx and the parent lock specified by dblt and tag before
+ * returning.
+ */
static int
-dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
+dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
+ db_lock_type_t dblt, void *tag)
{
dnode_t *dn;
zbookmark_phys_t zb;
@@ -1053,11 +1098,11 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
- /* We need the struct_rwlock to prevent db_blkptr from changing. */
- ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db_state == DB_UNCACHED);
ASSERT(db->db_buf == NULL);
+ ASSERT(db->db_parent == NULL ||
+ RW_LOCK_HELD(&db->db_parent->db_rwlock));
if (db->db_blkid == DMU_BONUS_BLKID) {
/*
@@ -1094,6 +1139,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
DB_DNODE_EXIT(db);
db->db_state = DB_CACHED;
mutex_exit(&db->db_mtx);
+ dmu_buf_unlock_parent(db, dblt, tag);
return (0);
}
@@ -1134,6 +1180,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
DB_DNODE_EXIT(db);
db->db_state = DB_CACHED;
mutex_exit(&db->db_mtx);
+ dmu_buf_unlock_parent(db, dblt, tag);
return (0);
}
@@ -1150,12 +1197,14 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
"object set %llu", dmu_objset_id(db->db_objset));
DB_DNODE_EXIT(db);
mutex_exit(&db->db_mtx);
+ dmu_buf_unlock_parent(db, dblt, tag);
return (SET_ERROR(EIO));
}
err = dbuf_read_verify_dnode_crypt(db, flags);
if (err != 0) {
DB_DNODE_EXIT(db);
+ dmu_buf_unlock_parent(db, dblt, tag);
mutex_exit(&db->db_mtx);
return (err);
}
@@ -1175,11 +1224,18 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr))
zio_flags |= ZIO_FLAG_RAW;
-
- err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
+ /*
+ * The zio layer will copy the provided blkptr later, but we need to
+ * do this now so that we can release the parent's rwlock. We have to
+ * do that now so that if dbuf_read_done is called synchronously (on
+ * an l1 cache hit) we don't acquire the db_mtx while holding the
+ * parent's rwlock, which would be a lock ordering violation.
+ */
+ blkptr_t bp = *db->db_blkptr;
+ dmu_buf_unlock_parent(db, dblt, tag);
+ (void) arc_read(zio, db->db_objset->os_spa, &bp,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
&aflags, &zb);
-
return (err);
}
@@ -1278,8 +1334,6 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
- if ((flags & DB_RF_HAVESTRUCT) == 0)
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
(flags & DB_RF_NOPREFETCH) == 0 && dn != NULL &&
@@ -1316,29 +1370,32 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
dbuf_set_data(db, db->db_buf);
}
mutex_exit(&db->db_mtx);
- if (err == 0 && prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
- if ((flags & DB_RF_HAVESTRUCT) == 0)
- rw_exit(&dn->dn_struct_rwlock);
+ if (err == 0 && prefetch) {
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
+ flags & DB_RF_HAVESTRUCT);
+ }
DB_DNODE_EXIT(db);
} else if (db->db_state == DB_UNCACHED) {
spa_t *spa = dn->dn_objset->os_spa;
boolean_t need_wait = B_FALSE;
+ db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
+
if (zio == NULL &&
db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
need_wait = B_TRUE;
}
- err = dbuf_read_impl(db, zio, flags);
-
- /* dbuf_read_impl has dropped db_mtx for us */
-
- if (!err && prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
+ err = dbuf_read_impl(db, zio, flags, dblt, FTAG);
+ /*
+ * dbuf_read_impl has dropped db_mtx and our parent's rwlock
+ * for us
+ */
+ if (!err && prefetch) {
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
+ flags & DB_RF_HAVESTRUCT);
+ }
- if ((flags & DB_RF_HAVESTRUCT) == 0)
- rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
if (!err && need_wait)
@@ -1353,10 +1410,10 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
* occurred and the dbuf went to UNCACHED.
*/
mutex_exit(&db->db_mtx);
- if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
- if ((flags & DB_RF_HAVESTRUCT) == 0)
- rw_exit(&dn->dn_struct_rwlock);
+ if (prefetch) {
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
+ flags & DB_RF_HAVESTRUCT);
+ }
DB_DNODE_EXIT(db);
/* Skip the wait per the caller's request. */
@@ -1536,7 +1593,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
if (db->db_state == DB_CACHED) {
ASSERT(db->db.db_data != NULL);
arc_release(db->db_buf, db);
+ rw_enter(&db->db_rwlock, RW_WRITER);
bzero(db->db.db_data, db->db.db_size);
+ rw_exit(&db->db_rwlock);
arc_buf_freeze(db->db_buf);
}
@@ -1558,15 +1617,6 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
- /* XXX does *this* func really need the lock? */
- ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
-
- /*
- * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held
- * is OK, because there can be no other references to the db
- * when we are changing its size, so no concurrent DB_FILL can
- * be happening.
- */
/*
* XXX we should be doing a dbuf_read, checking the return
* value and returning that up to our callers
@@ -1643,8 +1693,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
dnode_t *dn;
objset_t *os;
dbuf_dirty_record_t **drp, *dr;
- int drop_struct_lock = FALSE;
int txgoff = tx->tx_txg & TXG_MASK;
+ boolean_t drop_struct_rwlock = B_FALSE;
ASSERT(tx->tx_txg != 0);
ASSERT(!zfs_refcount_is_zero(&db->db_holds));
@@ -1846,15 +1896,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (dr);
}
- /*
- * The dn_struct_rwlock prevents db_blkptr from changing
- * due to a write from syncing context completing
- * while we are running, so we want to acquire it before
- * looking at db_blkptr.
- */
if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- drop_struct_lock = TRUE;
+ drop_struct_rwlock = B_TRUE;
+ }
+
+ /*
+ * If we are overwriting a dedup BP, then unless it is snapshotted,
+ * when we get to syncing context we will need to decrement its
+ * refcount in the DDT. Prefetch the relevant DDT block so that
+ * syncing context won't have to wait for the i/o.
+ */
+ if (db->db_blkptr != NULL) {
+ db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
+ ddt_prefetch(os->os_spa, db->db_blkptr);
+ dmu_buf_unlock_parent(db, dblt, FTAG);
}
/*
@@ -1867,19 +1923,12 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
- /*
- * If we are overwriting a dedup BP, then unless it is snapshotted,
- * when we get to syncing context we will need to decrement its
- * refcount in the DDT. Prefetch the relevant DDT block so that
- * syncing context won't have to wait for the i/o.
- */
- ddt_prefetch(os->os_spa, db->db_blkptr);
if (db->db_level == 0) {
ASSERT(!db->db_objset->os_raw_receive ||
dn->dn_maxblkid >= db->db_blkid);
dnode_new_blkid(dn, db->db_blkid, tx,
- drop_struct_lock, B_FALSE);
+ drop_struct_rwlock, B_FALSE);
ASSERT(dn->dn_maxblkid >= db->db_blkid);
}
@@ -1890,15 +1939,14 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) {
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
-
- parent = dbuf_hold_level(dn, db->db_level+1,
+ parent = dbuf_hold_level(dn, db->db_level + 1,
db->db_blkid >> epbs, FTAG);
ASSERT(parent != NULL);
parent_held = TRUE;
}
- if (drop_struct_lock)
+ if (drop_struct_rwlock)
rw_exit(&dn->dn_struct_rwlock);
- ASSERT3U(db->db_level+1, ==, parent->db_level);
+ ASSERT3U(db->db_level + 1, ==, parent->db_level);
di = dbuf_dirty(parent, tx);
if (parent_held)
dbuf_rele(parent, FTAG);
@@ -1919,14 +1967,14 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
}
mutex_exit(&db->db_mtx);
} else {
- ASSERT(db->db_level+1 == dn->dn_nlevels);
+ ASSERT(db->db_level + 1 == dn->dn_nlevels);
ASSERT(db->db_blkid < dn->dn_nblkptr);
ASSERT(db->db_parent == NULL || db->db_parent == dn->dn_dbuf);
mutex_enter(&dn->dn_mtx);
ASSERT(!list_link_active(&dr->dr_dirty_node));
list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
mutex_exit(&dn->dn_mtx);
- if (drop_struct_lock)
+ if (drop_struct_rwlock)
rw_exit(&dn->dn_struct_rwlock);
}
@@ -2447,10 +2495,12 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
*parentp = NULL;
return (err);
}
+ rw_enter(&(*parentp)->db_rwlock, RW_READER);
*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
(blkid & ((1ULL << epbs) - 1));
if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))
ASSERT(BP_IS_HOLE(*bpp));
+ rw_exit(&(*parentp)->db_rwlock);
return (0);
} else {
/* the block is referenced from the dnode */
@@ -2695,7 +2745,7 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
if (blkid > dn->dn_maxblkid)
return;
- if (dnode_block_freed(dn, blkid))
+ if (level == 0 && dnode_block_freed(dn, blkid))
return;
/*
@@ -2841,7 +2891,9 @@ dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db, dbuf_dirty_record_t *dr)
DBUF_GET_BUFC_TYPE(db), db->db.db_size));
}
+ rw_enter(&db->db_rwlock, RW_WRITER);
bcopy(data->b_data, db->db.db_data, arc_buf_size(data));
+ rw_exit(&db->db_rwlock);
}
/*
@@ -2967,7 +3019,6 @@ int
dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- dnode_t *dn;
if (db->db_blkid != DMU_SPILL_BLKID)
return (SET_ERROR(ENOTSUP));
@@ -2976,12 +3027,7 @@ dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx)
ASSERT3U(blksz, <=, spa_maxblocksize(dmu_objset_spa(db->db_objset)));
blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE);
- DB_DNODE_ENTER(db);
- dn = DB_DNODE(db);
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
dbuf_new_size(db, blksz, tx);
- rw_exit(&dn->dn_struct_rwlock);
- DB_DNODE_EXIT(db);
return (0);
}
@@ -3697,9 +3743,9 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
mutex_exit(&db->db_mtx);
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_WRITER, FTAG);
*db->db_blkptr = *bp;
- rw_exit(&dn->dn_struct_rwlock);
+ dmu_buf_unlock_parent(db, dblt, FTAG);
}
/* ARGSUSED */
@@ -3740,9 +3786,9 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
* anybody from reading the blocks we're about to
* zero out.
*/
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ rw_enter(&db->db_rwlock, RW_WRITER);
bzero(db->db.db_data, db->db.db_size);
- rw_exit(&dn->dn_struct_rwlock);
+ rw_exit(&db->db_rwlock);
}
DB_DNODE_EXIT(db);
}
@@ -3932,7 +3978,7 @@ dbuf_remap_impl_callback(uint64_t vdev, uint64_t offset, uint64_t size,
}
static void
-dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, dmu_tx_t *tx)
+dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
{
blkptr_t bp_copy = *bp;
spa_t *spa = dmu_objset_spa(dn->dn_objset);
@@ -3946,14 +3992,16 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, dmu_tx_t *tx)
if (spa_remap_blkptr(spa, &bp_copy, dbuf_remap_impl_callback,
&drica)) {
/*
- * The struct_rwlock prevents dbuf_read_impl() from
+ * The db_rwlock prevents dbuf_read_impl() from
* dereferencing the BP while we are changing it. To
* avoid lock contention, only grab it when we are actually
* changing the BP.
*/
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ if (rw != NULL)
+ rw_enter(rw, RW_WRITER);
*bp = bp_copy;
- rw_exit(&dn->dn_struct_rwlock);
+ if (rw != NULL)
+ rw_exit(rw);
}
}
@@ -4026,7 +4074,7 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
if (db->db_level > 0) {
blkptr_t *bp = db->db.db_data;
for (int i = 0; i < db->db.db_size >> SPA_BLKPTRSHIFT; i++) {
- dbuf_remap_impl(dn, &bp[i], tx);
+ dbuf_remap_impl(dn, &bp[i], &db->db_rwlock, tx);
}
} else if (db->db.db_object == DMU_META_DNODE_OBJECT) {
dnode_phys_t *dnp = db->db.db_data;
@@ -4034,7 +4082,10 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
DMU_OT_DNODE);
for (int i = 0; i < db->db.db_size >> DNODE_SHIFT; i++) {
for (int j = 0; j < dnp[i].dn_nblkptr; j++) {
- dbuf_remap_impl(dn, &dnp[i].dn_blkptr[j], tx);
+ krwlock_t *lock = (dn->dn_dbuf == NULL ? NULL :
+ &dn->dn_dbuf->db_rwlock);
+ dbuf_remap_impl(dn, &dnp[i].dn_blkptr[j], lock,
+ tx);
}
}
}
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index 67ad5d10f6..6620fbc43e 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -172,8 +172,8 @@ dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
uint64_t blkid;
dmu_buf_impl_t *db;
- blkid = dbuf_whichblock(dn, 0, offset);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ blkid = dbuf_whichblock(dn, 0, offset);
db = dbuf_hold(dn, blkid, tag);
rw_exit(&dn->dn_struct_rwlock);
@@ -197,8 +197,8 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
- blkid = dbuf_whichblock(dn, 0, offset);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ blkid = dbuf_whichblock(dn, 0, offset);
db = dbuf_hold(dn, blkid, tag);
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
@@ -605,7 +605,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
dmu_zfetch(&dn->dn_zfetch, blkid, nblks,
- read && DNODE_IS_CACHEABLE(dn));
+ read && DNODE_IS_CACHEABLE(dn), B_TRUE);
}
rw_exit(&dn->dn_struct_rwlock);
@@ -737,7 +737,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
if (err != 0)
return;
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
/*
* offset + len - 1 is the last byte we want to prefetch for, and offset
* is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the
@@ -745,6 +744,7 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
* offset) is the first. Then the number we need to prefetch is the
* last - first + 1.
*/
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (level > 0 || dn->dn_datablkshift != 0) {
nblks = dbuf_whichblock(dn, level, offset + len - 1) -
dbuf_whichblock(dn, level, offset) + 1;
@@ -757,7 +757,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
for (int i = 0; i < nblks; i++)
dbuf_prefetch(dn, level, blkid + i, pri, 0);
}
-
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c
index 884f0b36bb..a98097a8ee 100644
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c
@@ -684,8 +684,9 @@ dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
- ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+ ds_hold_flags_t flags;
+ flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
err = dsl_pool_hold(name, tag, &dp);
if (err != 0)
return (err);
@@ -758,8 +759,9 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
- ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+ ds_hold_flags_t flags;
+ flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
err = dsl_pool_hold(name, FTAG, &dp);
if (err != 0)
return (err);
@@ -797,8 +799,9 @@ dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
{
dsl_dataset_t *ds;
int err;
- ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+ ds_hold_flags_t flags;
+ flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds);
if (err != 0)
return (err);
@@ -815,9 +818,10 @@ dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
void
dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag)
{
- ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
-
+ ds_hold_flags_t flags;
dsl_pool_t *dp = dmu_objset_pool(os);
+
+ flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag);
dsl_pool_rele(dp, tag);
}
@@ -845,7 +849,9 @@ dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds,
{
dsl_pool_t *dp;
char name[ZFS_MAX_DATASET_NAME_LEN];
+ ds_hold_flags_t flags;
+ flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
VERIFY3P(ds, !=, NULL);
VERIFY3P(ds->ds_owner, ==, tag);
VERIFY(dsl_dataset_long_held(ds));
@@ -854,21 +860,22 @@ dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds,
dp = ds->ds_dir->dd_pool;
dsl_pool_config_enter(dp, FTAG);
- dsl_dataset_disown(ds, decrypt, tag);
- VERIFY0(dsl_dataset_own(dp, name,
- (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, newds));
+ dsl_dataset_disown(ds, flags, tag);
+ VERIFY0(dsl_dataset_own(dp, name, flags, tag, newds));
dsl_pool_config_exit(dp, FTAG);
}
void
dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag)
{
+ ds_hold_flags_t flags;
+
+ flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
/*
* Stop upgrading thread
*/
dmu_objset_upgrade_stop(os);
- dsl_dataset_disown(os->os_dsl_dataset,
- (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag);
+ dsl_dataset_disown(os->os_dsl_dataset, flags, tag);
}
void
diff --git a/usr/src/uts/common/fs/zfs/dmu_recv.c b/usr/src/uts/common/fs/zfs/dmu_recv.c
index 39f365652e..03e0fee4ff 100644
--- a/usr/src/uts/common/fs/zfs/dmu_recv.c
+++ b/usr/src/uts/common/fs/zfs/dmu_recv.c
@@ -201,7 +201,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
uint64_t fromguid = drrb->drr_fromguid;
int flags = drrb->drr_flags;
- ds_hold_flags_t dsflags = 0;
+ ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
int error;
uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
dsl_dataset_t *ds;
@@ -399,7 +399,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_t *ds, *newds;
objset_t *os;
uint64_t dsobj;
- ds_hold_flags_t dsflags = 0;
+ ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
int error;
uint64_t crflags = 0;
dsl_crypto_params_t dummy_dcp = { 0 };
@@ -541,7 +541,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
dsl_pool_t *dp = dmu_tx_pool(tx);
struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
int error;
- ds_hold_flags_t dsflags = 0;
+ ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
dsl_dataset_t *ds;
const char *tofs = drba->drba_cookie->drc_tofs;
@@ -670,7 +670,7 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
dsl_dataset_t *ds;
objset_t *os;
- ds_hold_flags_t dsflags = 0;
+ ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
uint64_t dsobj;
/* 6 extra bytes for /%recv */
char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
@@ -1824,8 +1824,9 @@ static void
dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
{
dsl_dataset_t *ds = drc->drc_ds;
- ds_hold_flags_t dsflags = (drc->drc_raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
+ ds_hold_flags_t dsflags;
+ dsflags = (drc->drc_raw) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
/*
* Wait for the txg sync before cleaning up the receive. For
* resumable receives, this ensures that our resume state has
@@ -2832,11 +2833,12 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
dsl_dataset_t *snapds;
guid_map_entry_t *gmep;
objset_t *os;
- ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
+ ds_hold_flags_t dsflags;
int err;
ASSERT(guid_map != NULL);
+ dsflags = (raw) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
err = dsl_pool_hold(name, FTAG, &dp);
if (err != 0)
return (err);
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c
index 34cfa2c011..d91a48e2ca 100644
--- a/usr/src/uts/common/fs/zfs/dmu_send.c
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c
@@ -1222,9 +1222,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
dsl_pool_t *dp;
dsl_dataset_t *ds;
dsl_dataset_t *fromds = NULL;
- ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
+ ds_hold_flags_t dsflags;
int err;
+ dsflags = (rawok) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
err = dsl_pool_hold(pool, FTAG, &dp);
if (err != 0)
return (err);
@@ -1287,9 +1288,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
- ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
+ ds_hold_flags_t dsflags;
boolean_t owned = B_FALSE;
+ dsflags = (rawok) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL)
return (SET_ERROR(EINVAL));
diff --git a/usr/src/uts/common/fs/zfs/dmu_zfetch.c b/usr/src/uts/common/fs/zfs/dmu_zfetch.c
index 5d6f20d072..60e0f36a5e 100644
--- a/usr/src/uts/common/fs/zfs/dmu_zfetch.c
+++ b/usr/src/uts/common/fs/zfs/dmu_zfetch.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -204,7 +204,8 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
* TRUE -- prefetch predicted data blocks plus following indirect blocks.
*/
void
-dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
+dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
+ boolean_t have_lock)
{
zstream_t *zs;
int64_t pf_start, ipf_start, ipf_istart, ipf_iend;
@@ -233,6 +234,9 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
if (blkid == 0)
return;
+ if (!have_lock)
+ rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
+
rw_enter(&zf->zf_rwlock, RW_READER);
/*
@@ -257,6 +261,10 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
/* Already prefetched this before. */
mutex_exit(&zs->zs_lock);
rw_exit(&zf->zf_rwlock);
+ if (!have_lock) {
+ rw_exit(&zf->zf_dnode->
+ dn_struct_rwlock);
+ }
return;
}
break;
@@ -274,6 +282,8 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
if (rw_tryupgrade(&zf->zf_rwlock))
dmu_zfetch_stream_create(zf, end_of_access_blkid);
rw_exit(&zf->zf_rwlock);
+ if (!have_lock)
+ rw_exit(&zf->zf_dnode->dn_struct_rwlock);
return;
}
@@ -353,5 +363,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
dbuf_prefetch(zf->zf_dnode, 1, iblk,
ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
}
+ if (!have_lock)
+ rw_exit(&zf->zf_dnode->dn_struct_rwlock);
ZFETCHSTAT_BUMP(zfetchstat_hits);
}
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c
index 6550a1f066..53aeb42c0e 100644
--- a/usr/src/uts/common/fs/zfs/dnode.c
+++ b/usr/src/uts/common/fs/zfs/dnode.c
@@ -1346,7 +1346,6 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
}
blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t));
-
db = dbuf_hold(mdn, blk, FTAG);
if (drop_struct_lock)
rw_exit(&mdn->dn_struct_rwlock);
@@ -1783,10 +1782,11 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
/* resize the old block */
err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
- if (err == 0)
+ if (err == 0) {
dbuf_new_size(db, size, tx);
- else if (err != ENOENT)
+ } else if (err != ENOENT) {
goto fail;
+ }
dnode_setdblksz(dn, size);
dnode_setdirty(dn, tx);
@@ -2021,7 +2021,6 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
int trunc = FALSE;
int epbs;
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
blksz = dn->dn_datablksz;
blkshift = dn->dn_datablkshift;
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
@@ -2038,7 +2037,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
head = P2NPHASE(off, blksz);
blkoff = P2PHASE(off, blksz);
if ((off >> blkshift) > dn->dn_maxblkid)
- goto out;
+ return;
} else {
ASSERT(dn->dn_maxblkid == 0);
if (off == 0 && len >= blksz) {
@@ -2047,12 +2046,15 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
*/
blkid = 0;
nblks = 1;
- if (dn->dn_nlevels > 1)
+ if (dn->dn_nlevels > 1) {
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
dnode_dirty_l1(dn, 0, tx);
+ rw_exit(&dn->dn_struct_rwlock);
+ }
goto done;
} else if (off >= blksz) {
/* Freeing past end-of-data */
- goto out;
+ return;
} else {
/* Freeing part of the block. */
head = blksz - off;
@@ -2062,19 +2064,26 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
}
/* zero out any partial block data at the start of the range */
if (head) {
+ int res;
ASSERT3U(blkoff + head, ==, blksz);
if (len < head)
head = len;
- if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
- TRUE, FALSE, FTAG, &db) == 0) {
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ res = dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
+ TRUE, FALSE, FTAG, &db);
+ rw_exit(&dn->dn_struct_rwlock);
+ if (res == 0) {
caddr_t data;
+ boolean_t dirty;
+ db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER,
+ FTAG);
/* don't dirty if it isn't on disk and isn't dirty */
- if (db->db_last_dirty ||
- (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
- rw_exit(&dn->dn_struct_rwlock);
+ dirty = db->db_last_dirty ||
+ (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr));
+ dmu_buf_unlock_parent(db, dblt, FTAG);
+ if (dirty) {
dmu_buf_will_dirty(&db->db, tx);
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
data = db->db.db_data;
bzero(data + blkoff, head);
}
@@ -2086,11 +2095,11 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
/* If the range was less than one block, we're done */
if (len == 0)
- goto out;
+ return;
/* If the remaining range is past end of file, we're done */
if ((off >> blkshift) > dn->dn_maxblkid)
- goto out;
+ return;
ASSERT(ISP2(blksz));
if (trunc)
@@ -2101,16 +2110,23 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
ASSERT0(P2PHASE(off, blksz));
/* zero out any partial block data at the end of the range */
if (tail) {
+ int res;
if (len < tail)
tail = len;
- if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
- TRUE, FALSE, FTAG, &db) == 0) {
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ res = dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
+ TRUE, FALSE, FTAG, &db);
+ rw_exit(&dn->dn_struct_rwlock);
+ if (res == 0) {
+ boolean_t dirty;
/* don't dirty if not on disk and not dirty */
- if (db->db_last_dirty ||
- (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
- rw_exit(&dn->dn_struct_rwlock);
+ db_lock_type_t type = dmu_buf_lock_parent(db, RW_READER,
+ FTAG);
+ dirty = db->db_last_dirty ||
+ (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr));
+ dmu_buf_unlock_parent(db, type, FTAG);
+ if (dirty) {
dmu_buf_will_dirty(&db->db, tx);
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
bzero(db->db.db_data, tail);
}
dbuf_rele(db, FTAG);
@@ -2120,7 +2136,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
/* If the range did not include a full block, we are done */
if (len == 0)
- goto out;
+ return;
ASSERT(IS_P2ALIGNED(off, blksz));
ASSERT(trunc || IS_P2ALIGNED(len, blksz));
@@ -2150,6 +2166,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
* amount of space if we copy the freed BPs into deadlists.
*/
if (dn->dn_nlevels > 1) {
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
uint64_t first, last;
first = blkid >> epbs;
@@ -2194,6 +2211,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
dnode_dirty_l1(dn, i, tx);
}
+ rw_exit(&dn->dn_struct_rwlock);
}
done:
@@ -2215,9 +2233,6 @@ done:
dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
dnode_setdirty(dn, tx);
-out:
-
- rw_exit(&dn->dn_struct_rwlock);
}
static boolean_t
@@ -2329,6 +2344,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
dprintf("probing object %llu offset %llx level %d of %u\n",
dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
+ ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+
hole = ((flags & DNODE_FIND_HOLE) != 0);
inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
ASSERT(txg == 0 || !hole);
@@ -2361,9 +2378,9 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
return (error);
}
data = db->db.db_data;
+ rw_enter(&db->db_rwlock, RW_READER);
}
-
if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
db->db_blkptr->blk_birth <= txg ||
BP_IS_HOLE(db->db_blkptr))) {
@@ -2423,8 +2440,10 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
error = SET_ERROR(ESRCH);
}
- if (db)
+ if (db != NULL) {
+ rw_exit(&db->db_rwlock);
dbuf_rele(db, FTAG);
+ }
return (error);
}
diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c
index 4a060403da..396d58da17 100644
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c
@@ -52,7 +52,6 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
/* this dnode can't be paged out because it's dirty */
ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
- ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);
db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
@@ -62,8 +61,24 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
dn->dn_object, dn->dn_phys->dn_nlevels);
+ /*
+ * Lock ordering requires that we hold the children's db_mutexes (by
+ * calling dbuf_find()) before holding the parent's db_rwlock. The lock
+ * order is imposed by dbuf_read's steps of "grab the lock to protect
+ * db_parent, get db_parent, hold db_parent's db_rwlock".
+ */
+ dmu_buf_impl_t *children[DN_MAX_NBLKPTR];
+ ASSERT3U(nblkptr, <=, DN_MAX_NBLKPTR);
+ for (i = 0; i < nblkptr; i++) {
+ children[i] =
+ dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i);
+ }
+
/* transfer dnode's block pointers to new indirect block */
(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
+ if (dn->dn_dbuf != NULL)
+ rw_enter(&dn->dn_dbuf->db_rwlock, RW_WRITER);
+ rw_enter(&db->db_rwlock, RW_WRITER);
ASSERT(db->db.db_data);
ASSERT(arc_released(db->db_buf));
ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
@@ -73,8 +88,7 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
/* set dbuf's parent pointers to new indirect buf */
for (i = 0; i < nblkptr; i++) {
- dmu_buf_impl_t *child =
- dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i);
+ dmu_buf_impl_t *child = children[i];
if (child == NULL)
continue;
@@ -107,6 +121,10 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr);
+ rw_exit(&db->db_rwlock);
+ if (dn->dn_dbuf != NULL)
+ rw_exit(&dn->dn_dbuf->db_rwlock);
+
dbuf_rele(db, FTAG);
rw_exit(&dn->dn_struct_rwlock);
@@ -183,7 +201,7 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
ASSERT(db->db_level == 1);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- err = dbuf_hold_impl(dn, db->db_level-1,
+ err = dbuf_hold_impl(dn, db->db_level - 1,
(db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child);
rw_exit(&dn->dn_struct_rwlock);
if (err == ENOENT)
@@ -281,7 +299,9 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
* ancestor of the first or last block to be freed. The first and
* last L1 indirect blocks are always dirtied by dnode_free_range().
*/
+ db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG);
VERIFY(BP_GET_FILL(db->db_blkptr) == 0 || db->db_dirtycnt > 0);
+ dmu_buf_unlock_parent(db, dblt, FTAG);
dbuf_release_bp(db);
bp = db->db.db_data;
@@ -307,7 +327,9 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
if (db->db_level == 1) {
FREE_VERIFY(db, start, end, tx);
- free_blocks(dn, bp, end-start+1, tx);
+ rw_enter(&db->db_rwlock, RW_WRITER);
+ free_blocks(dn, bp, end - start + 1, tx);
+ rw_exit(&db->db_rwlock);
} else {
for (uint64_t id = start; id <= end; id++, bp++) {
if (BP_IS_HOLE(bp))
@@ -324,10 +346,12 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
}
if (free_indirects) {
+ rw_enter(&db->db_rwlock, RW_WRITER);
for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++)
ASSERT(BP_IS_HOLE(bp));
bzero(db->db.db_data, db->db.db_size);
free_blocks(dn, db->db_blkptr, 1, tx);
+ rw_exit(&db->db_rwlock);
}
DB_DNODE_EXIT(db);
@@ -379,7 +403,6 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i,
TRUE, FALSE, FTAG, &db));
rw_exit(&dn->dn_struct_rwlock);
-
free_children(db, blkid, nblks, free_indirects, tx);
dbuf_rele(db, FTAG);
}
diff --git a/usr/src/uts/common/fs/zfs/sys/dbuf.h b/usr/src/uts/common/fs/zfs/sys/dbuf.h
index 271232c61c..7482006eb1 100644
--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h
+++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h
@@ -108,6 +108,12 @@ typedef enum override_states {
DR_OVERRIDDEN
} override_states_t;
+typedef enum db_lock_type {
+ DLT_NONE,
+ DLT_PARENT,
+ DLT_OBJSET
+} db_lock_type_t;
+
typedef struct dbuf_dirty_record {
/* link on our parents dirty list */
list_node_t dr_dirty_node;
@@ -217,6 +223,22 @@ typedef struct dmu_buf_impl {
*/
uint8_t db_level;
+ /*
+ * Protects db_buf's contents if they contain an indirect block or data
+ * block of the meta-dnode. We use this lock to protect the structure of
+ * the block tree. This means that when modifying this dbuf's data, we
+ * grab its rwlock. When modifying its parent's data (including the
+ * blkptr to this dbuf), we grab the parent's rwlock. The lock ordering
+ * for this lock is:
+ * 1) dn_struct_rwlock
+ * 2) db_rwlock
+ * We don't currently grab multiple dbufs' db_rwlocks at once.
+ */
+ krwlock_t db_rwlock;
+
+ /* buffer holding our data */
+ arc_buf_t *db_buf;
+
/* db_mtx protects the members below */
kmutex_t db_mtx;
@@ -232,9 +254,6 @@ typedef struct dmu_buf_impl {
*/
zfs_refcount_t db_holds;
- /* buffer holding our data */
- arc_buf_t *db_buf;
-
kcondvar_t db_changed;
dbuf_dirty_record_t *db_data_pending;
@@ -336,6 +355,8 @@ void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx);
void dbuf_release_bp(dmu_buf_impl_t *db);
+db_lock_type_t dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag);
+void dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag);
boolean_t dbuf_can_remap(const dmu_buf_impl_t *buf);
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h b/usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h
index 21a3ff3a20..d426cc282b 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_zfetch.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
*/
#ifndef _DMU_ZFETCH_H
@@ -66,7 +66,8 @@ void zfetch_fini(void);
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_fini(zfetch_t *);
-void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t);
+void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t,
+ boolean_t);
#ifdef __cplusplus
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
index 189376eefc..0fd7e1a7e2 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -306,6 +306,7 @@ typedef struct dsl_dataset_snapshot_arg {
/* flags for holding the dataset */
typedef enum ds_hold_flags {
+ DS_HOLD_FLAG_NONE = 0 << 0,
DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */
} ds_hold_flags_t;
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
index a7ea684f9c..2f71105178 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
@@ -1366,7 +1366,8 @@ gcpu_mca_init(cmi_hdl_t hdl)
*/
if (!gcpu_suppress_log_on_init &&
((vendor == X86_VENDOR_Intel && family >= 0xf) ||
- vendor == X86_VENDOR_AMD))
+ vendor == X86_VENDOR_AMD ||
+ vendor == X86_VENDOR_HYGON))
gcpu_mca_logout(hdl, NULL, -1ULL, NULL, B_FALSE,
GCPU_MPT_WHAT_POKE_ERR);
diff --git a/usr/src/uts/i86pc/os/cmi_hw.c b/usr/src/uts/i86pc/os/cmi_hw.c
index aa549569b0..fb59826431 100644
--- a/usr/src/uts/i86pc/os/cmi_hw.c
+++ b/usr/src/uts/i86pc/os/cmi_hw.c
@@ -1272,6 +1272,7 @@ cmi_hdl_create(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
switch (vendor) {
case X86_VENDOR_Intel:
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
if (cmi_ext_topo_check == 0) {
cpuid_get_ext_topo((cpu_t *)priv, &cmi_core_nbits,
&cmi_strand_nbits);
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 6f54dee7f9..c40173d4c8 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -1817,6 +1817,7 @@ platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
}
break;
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
switch (eax) {
case 0x80000001:
@@ -2077,7 +2078,8 @@ cpuid_gather_apicid(struct cpuid_info *cpi)
}
}
- if (cpi->cpi_vendor == X86_VENDOR_AMD &&
+ if ((cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) &&
is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
return (cpi->cpi_extd[0x1e].cp_eax);
@@ -2742,7 +2744,8 @@ cpuid_use_amd_retpoline(struct cpuid_info *cpi)
uint64_t val;
on_trap_data_t otd;
- if (cpi->cpi_vendor != X86_VENDOR_AMD)
+ if (cpi->cpi_vendor != X86_VENDOR_AMD &&
+ cpi->cpi_vendor != X86_VENDOR_HYGON)
return (B_FALSE);
/*
@@ -2881,7 +2884,8 @@ cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
x86_spectrev2_mitigation_t v2mit;
- if (cpi->cpi_vendor == X86_VENDOR_AMD &&
+ if ((cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) &&
cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
add_x86_feature(featureset, X86FSET_IBPB);
@@ -3092,7 +3096,8 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
cpi = cpu->cpu_m.mcpu_cpi;
- if (cpi->cpi_vendor == X86_VENDOR_AMD) {
+ if (cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) {
cpuid_gather_amd_topology_leaves(cpu);
}
@@ -3108,6 +3113,7 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
&cpi->cpi_ncore_per_chip);
break;
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
&cpi->cpi_ncore_per_chip);
break;
@@ -3157,7 +3163,8 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
cpi->cpi_clogid = 0;
cpi->cpi_coreid = cpu->cpu_id;
cpi->cpi_pkgcoreid = 0;
- if (cpi->cpi_vendor == X86_VENDOR_AMD) {
+ if (cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) {
cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
} else {
cpi->cpi_procnodeid = cpi->cpi_chipid;
@@ -3168,6 +3175,7 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
cpuid_intel_getids(cpu, featureset);
break;
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
cpuid_amd_getids(cpu, featureset);
break;
default:
@@ -3358,6 +3366,9 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
if (CPI_FAMILY(cpi) == 0xf)
cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
break;
+ case X86_VENDOR_HYGON:
+ cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
+ break;
default:
if (cpi->cpi_model == 0xf)
cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
@@ -3471,6 +3482,10 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
#endif
break;
+ case X86_VENDOR_HYGON:
+ /* Enable all for Hygon Dhyana CPU */
+ mask_ecx = 0xffffffff;
+ break;
case X86_VENDOR_TM:
/*
* workaround the NT workaround in CMS 4.1
@@ -3934,6 +3949,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
x86_type == X86_TYPE_CYRIX_GXm)
xcpuid++;
break;
+ case X86_VENDOR_HYGON:
case X86_VENDOR_Centaur:
case X86_VENDOR_TM:
default:
@@ -3955,6 +3971,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
switch (cpi->cpi_vendor) {
case X86_VENDOR_Intel:
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
if (cpi->cpi_xmaxeax < 0x80000001)
break;
cp = &cpi->cpi_extd[1];
@@ -3998,7 +4015,8 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
add_x86_feature(featureset, X86FSET_1GPG);
}
- if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
+ if ((cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) &&
(cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
(cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
add_x86_feature(featureset, X86FSET_SSE4A);
@@ -4019,7 +4037,8 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
* that AMD processors don't support sysenter
* in long mode at all, so don't try to program them.
*/
- if (x86_vendor == X86_VENDOR_AMD) {
+ if (x86_vendor == X86_VENDOR_AMD ||
+ x86_vendor == X86_VENDOR_HYGON) {
remove_x86_feature(featureset, X86FSET_SEP);
}
@@ -4073,6 +4092,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
}
/*FALLTHROUGH*/
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
break;
cp = &cpi->cpi_extd[8];
@@ -4084,7 +4104,8 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
/*
* AMD uses ebx for some extended functions.
*/
- if (cpi->cpi_vendor == X86_VENDOR_AMD) {
+ if (cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) {
/*
* While we're here, check for the AMD "Error
* Pointer Zero/Restore" feature. This can be
@@ -4120,6 +4141,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
switch (cpi->cpi_vendor) {
case X86_VENDOR_Intel:
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
if (cpi->cpi_maxeax >= 7) {
cp = &cpi->cpi_extd[7];
cp->cp_eax = 0x80000007;
@@ -4152,7 +4174,8 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
cpi->cpi_model, cpi->cpi_step);
- if (cpi->cpi_vendor == X86_VENDOR_AMD) {
+ if (cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) {
if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
/* Special handling for AMD FP not necessary. */
@@ -5032,7 +5055,8 @@ cpuid_pass3(cpu_t *cpu)
cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
- (cpi->cpi_vendor == X86_VENDOR_AMD &&
+ ((cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) &&
cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
uint32_t leaf;
@@ -5401,6 +5425,7 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
/*FALLTHROUGH*/
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
edx = &cpi->cpi_support[AMD_EDX_FEATURES];
ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
@@ -5417,6 +5442,7 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
break;
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
*edx &= ~CPUID_AMD_EDX_TSCP;
if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
@@ -5459,6 +5485,7 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
switch (cpi->cpi_vendor) {
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
if (*edx & CPUID_AMD_EDX_TSCP)
hwcap_flags |= AV_386_TSCP;
if (*ecx & CPUID_AMD_ECX_AHF64)
@@ -5603,7 +5630,8 @@ cpuid_syscall32_insn(cpu_t *cpu)
{
struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
- if (cpi->cpi_vendor == X86_VENDOR_AMD &&
+ if ((cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) &&
cpi->cpi_xmaxeax >= 0x80000001 &&
(CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
return (1);
@@ -5821,7 +5849,9 @@ cpuid_have_cr8access(cpu_t *cpu)
ASSERT(cpu != NULL);
cpi = cpu->cpu_m.mcpu_cpi;
- if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
+ if ((cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) &&
+ cpi->cpi_maxeax >= 1 &&
(CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
return (1);
return (0);
@@ -6789,6 +6819,8 @@ x86_which_cacheinfo(struct cpuid_info *cpi)
(cpi->cpi_family == 5 && cpi->cpi_model >= 1))
return (X86_VENDOR_AMD);
break;
+ case X86_VENDOR_HYGON:
+ return (X86_VENDOR_AMD);
case X86_VENDOR_TM:
if (cpi->cpi_family >= 5)
return (X86_VENDOR_AMD);
@@ -6885,6 +6917,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
case X86_VENDOR_AMD:
create = cpi->cpi_family >= 0xf;
break;
+ case X86_VENDOR_HYGON:
+ create = 1;
+ break;
default:
create = 0;
break;
@@ -6901,6 +6936,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
case X86_VENDOR_AMD:
create = CPI_FAMILY(cpi) == 0xf;
break;
+ case X86_VENDOR_HYGON:
+ create = 1;
+ break;
default:
create = 0;
break;
@@ -6912,6 +6950,7 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
/* generation */
switch (cpi->cpi_vendor) {
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
/*
* AMD K5 model 1 was the first part to support this
*/
@@ -6938,6 +6977,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
case X86_VENDOR_AMD:
create = cpi->cpi_family >= 0xf;
break;
+ case X86_VENDOR_HYGON:
+ create = 1;
+ break;
default:
create = 0;
break;
@@ -6958,6 +7000,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
case X86_VENDOR_AMD:
create = cpi->cpi_family >= 0xf;
break;
+ case X86_VENDOR_HYGON:
+ create = 1;
+ break;
default:
create = 0;
break;
@@ -6988,6 +7033,9 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
case X86_VENDOR_AMD:
create = cpi->cpi_family >= 0xf;
break;
+ case X86_VENDOR_HYGON:
+ create = 1;
+ break;
default:
create = 0;
break;
@@ -7000,6 +7048,7 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
switch (cpi->cpi_vendor) {
case X86_VENDOR_Intel:
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
case X86_VENDOR_Cyrix:
case X86_VENDOR_TM:
case X86_VENDOR_Centaur:
@@ -7513,7 +7562,8 @@ cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
cp.cp_ecx = 0;
(void) __cpuid_insn(&cp);
cpi->cpi_std[7] = cp;
- } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
+ } else if (cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) {
/* No xcpuid support */
if (cpi->cpi_family < 5 ||
(cpi->cpi_family == 5 && cpi->cpi_model < 1))
diff --git a/usr/src/uts/i86pc/os/cpuid_subr.c b/usr/src/uts/i86pc/os/cpuid_subr.c
index faa3e75b03..934b5d547d 100644
--- a/usr/src/uts/i86pc/os/cpuid_subr.c
+++ b/usr/src/uts/i86pc/os/cpuid_subr.c
@@ -88,12 +88,13 @@
* 15 for family 0x17, models 30 - 3f
* 16 for family 0x17, models 60 - 6f
* 17 for family 0x17, models 70 - 7f
- * 18 for family 0x19, models 00 - 0f
- * 19 for family 0x19, models 20 - 2f
+ * 18 for family 0x18, models 00 - 0f
+ * 19 for family 0x19, models 00 - 0f
+ * 20 for family 0x19, models 20 - 2f
* Second index by (model & 0x3) for family 0fh,
* CPUID pkg bits (Fn8000_0001_EBX[31:28]) for later families.
*/
-static uint32_t amd_skts[20][8] = {
+static uint32_t amd_skts[21][8] = {
/*
* Family 0xf revisions B through E
*/
@@ -365,7 +366,7 @@ static uint32_t amd_skts[20][8] = {
},
/*
- * Family 0x19 models 00-0f (Zen 3 - Milan)
+ * Family 0x18 models 00-0f (Dhyana)
*/
#define A_SKTS_18 18
{
@@ -373,6 +374,21 @@ static uint32_t amd_skts[20][8] = {
X86_SOCKET_UNKNOWN, /* 0b001 */
X86_SOCKET_UNKNOWN, /* 0b010 */
X86_SOCKET_UNKNOWN, /* 0b011 */
+ X86_SOCKET_SL1, /* 0b100 */
+ X86_SOCKET_UNKNOWN, /* 0b101 */
+ X86_SOCKET_DM1, /* 0b110 */
+ X86_SOCKET_SL1R2 /* 0b111 */
+ },
+
+ /*
+ * Family 0x19 models 00-0f (Zen 3 - Milan)
+ */
+#define A_SKTS_19 19
+ {
+ X86_SOCKET_UNKNOWN, /* 0b000 */
+ X86_SOCKET_UNKNOWN, /* 0b001 */
+ X86_SOCKET_UNKNOWN, /* 0b010 */
+ X86_SOCKET_UNKNOWN, /* 0b011 */
X86_SOCKET_SP3, /* 0b100 */
X86_SOCKET_UNKNOWN, /* 0b101 */
X86_SOCKET_UNKNOWN, /* 0b110 */
@@ -382,7 +398,7 @@ static uint32_t amd_skts[20][8] = {
/*
* Family 0x19 models 20-2f (Zen 3 - Vermeer)
*/
-#define A_SKTS_19 19
+#define A_SKTS_20 20
{
X86_SOCKET_UNKNOWN, /* 0b000 */
X86_SOCKET_UNKNOWN, /* 0b001 */
@@ -399,7 +415,7 @@ struct amd_sktmap_s {
uint32_t skt_code;
char sktstr[16];
};
-static struct amd_sktmap_s amd_sktmap_strs[X86_NUM_SOCKETS_AMD + 1] = {
+static struct amd_sktmap_s amd_sktmap_strs[X86_NUM_SOCKETS + 1] = {
{ X86_SOCKET_754, "754" },
{ X86_SOCKET_939, "939" },
{ X86_SOCKET_940, "940" },
@@ -434,6 +450,9 @@ static struct amd_sktmap_s amd_sktmap_strs[X86_NUM_SOCKETS_AMD + 1] = {
{ X86_SOCKET_FP5, "FP5" },
{ X86_SOCKET_FP6, "FP6" },
{ X86_SOCKET_STRX4, "sTRX4" },
+ { X86_SOCKET_SL1, "SL1" },
+ { X86_SOCKET_SL1R2, "SL1R2" },
+ { X86_SOCKET_DM1, "DM1" },
{ X86_SOCKET_UNKNOWN, "Unknown" }
};
@@ -459,8 +478,9 @@ static const struct amd_skt_mapent {
{ 0x17, 0x30, 0x3f, A_SKTS_15 },
{ 0x17, 0x60, 0x6f, A_SKTS_16 },
{ 0x17, 0x70, 0x7f, A_SKTS_17 },
- { 0x19, 0x00, 0x0f, A_SKTS_18 },
- { 0x19, 0x20, 0x2f, A_SKTS_19 }
+ { 0x18, 0x00, 0x0f, A_SKTS_18 },
+ { 0x19, 0x00, 0x0f, A_SKTS_19 },
+ { 0x19, 0x20, 0x2f, A_SKTS_20 }
};
/*
@@ -629,7 +649,13 @@ static const struct amd_rev_mapent {
A_SKTS_15 },
{ 0x17, 0x71, 0x71, 0x0, 0x0, X86_CHIPREV_AMD_17_MTS_B0, "MTS-B0",
- A_SKTS_17 }
+ A_SKTS_17 },
+
+ /*
+ * =============== HygonGenuine Family 0x18 ===============
+ */
+ { 0x18, 0x00, 0x00, 0x1, 0x1, X86_CHIPREV_HYGON_18_DN_A1, "DN_A1",
+ A_SKTS_18 },
};
/*
@@ -759,6 +785,7 @@ _cpuid_skt(uint_t vendor, uint_t family, uint_t model, uint_t step)
switch (vendor) {
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
synth_amd_info(family, model, step, &skt, NULL, NULL);
break;
@@ -779,6 +806,7 @@ _cpuid_sktstr(uint_t vendor, uint_t family, uint_t model, uint_t step)
switch (vendor) {
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
synth_amd_info(family, model, step, &skt, NULL, NULL);
sktmapp = amd_sktmap_strs;
@@ -805,6 +833,7 @@ _cpuid_chiprev(uint_t vendor, uint_t family, uint_t model, uint_t step)
switch (vendor) {
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
synth_amd_info(family, model, step, NULL, &chiprev, NULL);
break;
@@ -823,6 +852,7 @@ _cpuid_chiprevstr(uint_t vendor, uint_t family, uint_t model, uint_t step)
switch (vendor) {
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
synth_amd_info(family, model, step, NULL, NULL, &revstr);
break;
@@ -851,6 +881,8 @@ _cpuid_vendorstr_to_vendorcode(char *vendorstr)
return (X86_VENDOR_Intel);
else if (strcmp(vendorstr, X86_VENDORSTR_AMD) == 0)
return (X86_VENDOR_AMD);
+ else if (strcmp(vendorstr, X86_VENDORSTR_HYGON) == 0)
+ return (X86_VENDOR_HYGON);
else if (strcmp(vendorstr, X86_VENDORSTR_TM) == 0)
return (X86_VENDOR_TM);
else if (strcmp(vendorstr, CyrixInstead) == 0)
diff --git a/usr/src/uts/i86pc/os/cpupm/cpupm_amd.c b/usr/src/uts/i86pc/os/cpupm/cpupm_amd.c
index 086d9a8fe6..c99c191c40 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpupm_amd.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpupm_amd.c
@@ -37,8 +37,9 @@ cpupm_amd_init(cpu_t *cp)
cpupm_mach_state_t *mach_state =
(cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
- /* AMD? */
- if (x86_vendor != X86_VENDOR_AMD)
+ /* AMD or Hygon? */
+ if (x86_vendor != X86_VENDOR_AMD &&
+ x86_vendor != X86_VENDOR_HYGON)
return (B_FALSE);
/*
diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c
index 0e84030ac1..a53c797e4b 100644
--- a/usr/src/uts/i86pc/os/hma.c
+++ b/usr/src/uts/i86pc/os/hma.c
@@ -101,6 +101,7 @@ hma_init(void)
(void) hma_vmx_init();
break;
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
(void) hma_svm_init();
break;
default:
@@ -121,6 +122,7 @@ hma_register_backend(const char *name)
is_ready = hma_vmx_ready;
break;
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
is_ready = hma_svm_ready;
break;
default:
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index dd2b5d703b..ac0ff1716a 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -3212,6 +3212,7 @@ setx86isalist(void)
switch (x86_vendor) {
case X86_VENDOR_Intel:
case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
case X86_VENDOR_TM:
if (is_x86_feature(x86_featureset, X86FSET_CMOV)) {
/*
diff --git a/usr/src/uts/intel/ia32/os/cpc_subr.c b/usr/src/uts/intel/ia32/os/cpc_subr.c
index f7b86fd602..a74dfd77bc 100644
--- a/usr/src/uts/intel/ia32/os/cpc_subr.c
+++ b/usr/src/uts/intel/ia32/os/cpc_subr.c
@@ -140,7 +140,8 @@ kcpc_hw_init(cpu_t *cp)
strands_perfmon_shared = 1;
}
}
- } else if (cpuid_getvendor(cpu[0]) == X86_VENDOR_AMD) {
+ } else if (cpuid_getvendor(cpu[0]) == X86_VENDOR_AMD ||
+ cpuid_getvendor(cpu[0]) == X86_VENDOR_HYGON) {
/*
* On AMD systems with HT, all of the performance
* monitors exist on a per-logical CPU basis.
diff --git a/usr/src/uts/intel/io/amdzen/amdzen.c b/usr/src/uts/intel/io/amdzen/amdzen.c
index ac6ce9c94f..bd023a2edf 100644
--- a/usr/src/uts/intel/io/amdzen/amdzen.c
+++ b/usr/src/uts/intel/io/amdzen/amdzen.c
@@ -624,7 +624,7 @@ amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
return (DDI_WALK_CONTINUE);
}
- if (vid != AMDZEN_PCI_VID_AMD) {
+ if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
return (DDI_WALK_CONTINUE);
}
@@ -737,9 +737,10 @@ amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
return (DDI_FAILURE);
}
- if (vid != AMDZEN_PCI_VID_AMD) {
- dev_err(dip, CE_WARN, "expected AMD vendor ID (0x%x), found "
- "0x%x", AMDZEN_PCI_VID_AMD, vid);
+ if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
+ dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
+ cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
+ AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
return (DDI_FAILURE);
}
@@ -996,7 +997,8 @@ _init(void)
{
int ret;
- if (cpuid_getvendor(CPU) != X86_VENDOR_AMD) {
+ if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
+ cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
return (ENOTSUP);
}
diff --git a/usr/src/uts/intel/io/amdzen/amdzen.h b/usr/src/uts/intel/io/amdzen/amdzen.h
index 8150495911..6ba5266bd3 100644
--- a/usr/src/uts/intel/io/amdzen/amdzen.h
+++ b/usr/src/uts/intel/io/amdzen/amdzen.h
@@ -200,6 +200,11 @@ typedef enum {
*/
#define AMDZEN_PCI_VID_AMD 0x1022
+/*
+ * Hygon PCI ID for reference
+ */
+#define AMDZEN_PCI_VID_HYGON 0x1d94
+
typedef enum {
AMDZEN_STUB_TYPE_DF,
AMDZEN_STUB_TYPE_NB
diff --git a/usr/src/uts/intel/pcbe/opteron_pcbe.c b/usr/src/uts/intel/pcbe/opteron_pcbe.c
index c4496bf8ca..8d567daa64 100644
--- a/usr/src/uts/intel/pcbe/opteron_pcbe.c
+++ b/usr/src/uts/intel/pcbe/opteron_pcbe.c
@@ -547,7 +547,8 @@ opt_pcbe_init(void)
* loads this module based on its name in the module directory, but it
* could have been renamed.
*/
- if (cpuid_getvendor(CPU) != X86_VENDOR_AMD || amd_family < 0xf)
+ if ((cpuid_getvendor(CPU) != X86_VENDOR_AMD || amd_family < 0xf) &&
+ cpuid_getvendor(CPU) != X86_VENDOR_HYGON)
return (-1);
if (amd_family == 0xf) {
@@ -556,7 +557,9 @@ opt_pcbe_init(void)
"AMD Opteron & Athlon64");
} else {
(void) snprintf(amd_pcbe_impl_name, sizeof (amd_pcbe_impl_name),
- "AMD Family %02xh", amd_family);
+ "%s Family %02xh",
+ cpuid_getvendor(CPU) == X86_VENDOR_HYGON ? "Hygon" : "AMD",
+ amd_family);
}
/*
@@ -598,7 +601,8 @@ opt_pcbe_init(void)
amd_pcbe_cpuref = amd_fam_11h_bkdg;
amd_events = family_11h_events;
amd_generic_events = opt_generic_events;
- } else if (amd_family == 0x17 && amd_model <= 0x2f) {
+ } else if ((amd_family == 0x17 && amd_model <= 0x2f) ||
+ amd_family == 0x18) {
amd_pcbe_cpuref = amd_fam_17h_zen1_reg;
amd_events = opteron_pcbe_f17h_zen1_events;
amd_generic_events = family_17h_zen1_papi_events;
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index b75ab18f5e..2ec543677b 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -804,6 +804,9 @@ extern "C" {
#define X86_VENDOR_NSC 10
#define X86_VENDORSTR_NSC "Geode by NSC"
+#define X86_VENDOR_HYGON 11
+#define X86_VENDORSTR_HYGON "HygonGenuine"
+
/*
* Vendor string max len + \0
*/
@@ -968,6 +971,12 @@ extern "C" {
_X86_CHIPREV_MKREV(X86_VENDOR_AMD, 0x17, 0x0009)
/*
+ * Definitions for Hygon Family 0x18
+ */
+#define X86_CHIPREV_HYGON_18_DN_A1 \
+ _X86_CHIPREV_MKREV(X86_VENDOR_HYGON, 0x18, 0x0001)
+
+/*
* Various socket/package types, extended as the need to distinguish
* a new type arises. The top 8 byte identfies the vendor and the
* remaining 24 bits describe 24 socket types.
@@ -1026,6 +1035,15 @@ extern "C" {
#define X86_SOCKET_STRX4 _X86_SOCKET_MKVAL(X86_VENDOR_AMD, 0x23)
#define X86_NUM_SOCKETS_AMD 0x24
+/*
+ * Hygon socket types
+ */
+#define X86_SOCKET_SL1 _X86_SOCKET_MKVAL(X86_VENDOR_HYGON, 0x01)
+#define X86_SOCKET_SL1R2 _X86_SOCKET_MKVAL(X86_VENDOR_HYGON, 0x02)
+#define X86_SOCKET_DM1 _X86_SOCKET_MKVAL(X86_VENDOR_HYGON, 0x03)
+#define X86_NUM_SOCKETS_HYGON 0x04
+
+#define X86_NUM_SOCKETS (X86_NUM_SOCKETS_AMD + X86_NUM_SOCKETS_HYGON)
/*
* Definitions for Intel processor models. These are all for Family 6
diff --git a/usr/src/uts/sun4u/cpu/us3_common.c b/usr/src/uts/sun4u/cpu/us3_common.c
index 38a06c2731..14a2096e25 100644
--- a/usr/src/uts/sun4u/cpu/us3_common.c
+++ b/usr/src/uts/sun4u/cpu/us3_common.c
@@ -3331,7 +3331,7 @@ ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
case CE_ACT_DONE:
break;
- case CE_ACT(CE_DISP_BAD):
+ case CE_DISP_BAD:
default:
#ifdef DEBUG
cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
diff --git a/usr/src/uts/sun4u/io/px/px_hlib.c b/usr/src/uts/sun4u/io/px/px_hlib.c
index 11c529dfa7..a6ae488a76 100644
--- a/usr/src/uts/sun4u/io/px/px_hlib.c
+++ b/usr/src/uts/sun4u/io/px/px_hlib.c
@@ -161,9 +161,9 @@ static uint64_t msiq_config_other_regs[] = {
#define MSIQ_MAPPING_SIZE (MSI_MAPPING_ENTRIES * sizeof (uint64_t))
/* OPL tuning variables for link unstable issue */
-int wait_perst = 5000000; /* step 9, default: 5s */
+int wait_perst = 5000000; /* step 9, default: 5s */
int wait_enable_port = 30000; /* step 11, default: 30ms */
-int link_retry_count = 2; /* step 11, default: 2 */
+int link_retry_count = 2; /* step 11, default: 2 */
int link_status_check = 400000; /* step 11, default: 400ms */
static uint64_t msiq_suspend(devhandle_t dev_hdl, pxu_t *pxu_p);
@@ -2108,7 +2108,7 @@ uint64_t
hvio_intr_getstate(devhandle_t dev_hdl, sysino_t sysino,
intr_state_t *intr_state)
{
- intr_state_t state;
+ uint64_t state;
state = CSRA_FR((caddr_t)dev_hdl, INTERRUPT_CLEAR,
SYSINO_TO_DEVINO(sysino), ENTRIES_INT_STATE);
diff --git a/usr/src/uts/sun4u/sunfire/io/fhc_bd.c b/usr/src/uts/sun4u/sunfire/io/fhc_bd.c
index 5bc3b2fc0b..0a1936086b 100644
--- a/usr/src/uts/sun4u/sunfire/io/fhc_bd.c
+++ b/usr/src/uts/sun4u/sunfire/io/fhc_bd.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/ddi.h>
@@ -768,7 +766,7 @@ fhc_bdlist_prime(int first, int count, int incr)
type = jtag_get_board_type(jtm->jtag_cmd, sc);
switch (type) {
- case -1:
+ case EMPTY_BOARD:
fhc_bd_sc_evt(sc, SYSC_EVT_BD_EMPTY);
continue;
case DISK_BOARD:
diff --git a/usr/src/uts/sun4u/sunfire/io/jtag.c b/usr/src/uts/sun4u/sunfire/io/jtag.c
index 7dc2a74dd7..71396af2ed 100644
--- a/usr/src/uts/sun4u/sunfire/io/jtag.c
+++ b/usr/src/uts/sun4u/sunfire/io/jtag.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/param.h>
#include <sys/ddi.h>
@@ -1165,7 +1163,7 @@ jtag_check_plus_board(
/*
* Returns (positive) board type if something detected, including
* UNKNOWN_BOARD.
- * Returns -1 if nothing there.
+ * Returns EMPTY_BOARD if nothing there.
*/
enum board_type
jtag_get_board_type(volatile u_int *jreg, sysc_cfga_stat_t *sc)
@@ -1261,7 +1259,7 @@ jtag_get_board_type(volatile u_int *jreg, sysc_cfga_stat_t *sc)
break;
case RING_BROKEN:
- result = -1;
+ result = EMPTY_BOARD;
break;
default:
diff --git a/usr/src/uts/sun4u/sunfire/sys/fhc.h b/usr/src/uts/sun4u/sunfire/sys/fhc.h
index f66a5003cd..a76231e781 100644
--- a/usr/src/uts/sun4u/sunfire/sys/fhc.h
+++ b/usr/src/uts/sun4u/sunfire/sys/fhc.h
@@ -27,8 +27,6 @@
#ifndef _SYS_FHC_H
#define _SYS_FHC_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -486,6 +484,7 @@ struct ft_link_list {
* boards. It is used by both the kernel and user programs.
*/
enum board_type {
+ EMPTY_BOARD = -1,
UNINIT_BOARD = 0, /* Uninitialized board type */
UNKNOWN_BOARD, /* Unknown board type */
CPU_BOARD, /* System board CPU(s) */