[illumos-gate merge]

commit 3cb69f734bc60bbb4d56a28c83706db862bec082 3749 zfs event processing should work on R/O root filesystems commit b3d9f2e26021d3f55a281af30720589d303b9806 3747 txg commit callbacks don't work commit 8b713775314bbbf24edd503b4869342d8711ce95 3745 zpool create should treat -O mountpoint and -m the same 3811 zpool create -o altroot=/xyz -O mountpoint=/mnt ignores the mountpoint option commit fc7a6e3fefc649cb65c8e2a35d194781445008b0 3744 zfs shouldn't ignore errors unmounting snapshots commit b287be1ba86043996f49b1cc34c80cc620f9b841 3743 zfs needs a refcount audit commit f7170741490edba9d1d9c697c177c887172bc741 3742 zfs comments need cleaner, more consistent style commit 3e30c24aeefdee1631958ecf17f18da671781956 3741 zfs needs better comments commit 2ac302890e472bf0c11db192dd18f12ded6043f6 3797 AHCI: Support for ASMedia ASM106x
author: Keith M Wesolowski <wesolows@foobazco.org> 2013-06-11 22:47:43 +0000
committer: Keith M Wesolowski <wesolows@foobazco.org> 2013-06-11 22:47:43 +0000
commit: 80702ccb4f267d5bfae86b07731a8f89e724d055 (patch)
tree: 4bcba7532a1633ba6d708dbaf41c21af8876e7c0
parent: 2ffdaec9c70166169b32efb691ef26af80d99263 (diff)
parent: 3cb69f734bc60bbb4d56a28c83706db862bec082 (diff)
download: illumos-joyent-80702ccb4f267d5bfae86b07731a8f89e724d055.tar.gz
60 files changed, 806 insertions, 547 deletions
diff --git a/usr/src/cmd/mdb/intel/modules/sata/sata.c b/usr/src/cmd/mdb/intel/modules/sata/sata.c
index 5a43653d30..b1618cd8d8 100644
--- a/usr/src/cmd/mdb/intel/modules/sata/sata.c
+++ b/usr/src/cmd/mdb/intel/modules/sata/sata.c
@@ -22,7 +22,9 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
-
+/*
+ * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
+ */
 
 #include <sys/mdb_modapi.h>
 #include <mdb/mdb_ks.h>
@@ -168,7 +170,7 @@ sata_dmsg_dump(sata_trace_dmsg_t *addr, int print_pathname, uint_t *printed)
 					(void) mdb_ddi_pathname(
 					    (uintptr_t)dmsg.dip, pathname,
 					    sizeof (pathname));
-					mdb_printf("\n[%s]", pathname);
+					mdb_printf("[%s]", pathname);
 				}
 			}
 		} else {
@@ -181,7 +183,7 @@ sata_dmsg_dump(sata_trace_dmsg_t *addr, int print_pathname, uint_t *printed)
 			    dmsg.buf);
 		}
 
-		mdb_printf("%s", merge);
+		mdb_printf("%s\n", merge);
 
 		if (printed != NULL) {
 			(*printed)++;
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index 310c33462a..7a133bf27e 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -678,6 +678,7 @@ zpool_do_create(int argc, char **argv)
 				goto errout;
 			break;
 		case 'm':
+			/* Equivalent to -O mountpoint=optarg */
 			mountpoint = optarg;
 			break;
 		case 'o':
@@ -716,8 +717,18 @@ zpool_do_create(int argc, char **argv)
 			*propval = '\0';
 			propval++;
 
-			if (add_prop_list(optarg, propval, &fsprops, B_FALSE))
+			/*
+			 * Mountpoints are checked and then added later.
+			 * Uniquely among properties, they can be specified
+			 * more than once, to avoid conflict with -m.
+			 */
+			if (0 == strcmp(optarg,
+			    zfs_prop_to_name(ZFS_PROP_MOUNTPOINT))) {
+				mountpoint = propval;
+			} else if (add_prop_list(optarg, propval, &fsprops,
+			    B_FALSE)) {
 				goto errout;
+			}
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
@@ -834,6 +845,18 @@ zpool_do_create(int argc, char **argv)
 		}
 	}
 
+	/*
+	 * Now that the mountpoint's validity has been checked, ensure that
+	 * the property is set appropriately prior to creating the pool.
+	 */
+	if (mountpoint != NULL) {
+		ret = add_prop_list(zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
+		    mountpoint, &fsprops, B_FALSE);
+		if (ret != 0)
+			goto errout;
+	}
+
+	ret = 1;
 	if (dryrun) {
 		/*
 		 * For a dry run invocation, print out a basic message and run
@@ -868,21 +891,19 @@ zpool_do_create(int argc, char **argv)
 				if (nvlist_exists(props, propname))
 					continue;
 
-				if (add_prop_list(propname, ZFS_FEATURE_ENABLED,
-				    &props, B_TRUE) != 0)
+				ret = add_prop_list(propname,
+				    ZFS_FEATURE_ENABLED, &props, B_TRUE);
+				if (ret != 0)
 					goto errout;
 			}
 		}
+
+		ret = 1;
 		if (zpool_create(g_zfs, poolname,
 		    nvroot, props, fsprops) == 0) {
 			zfs_handle_t *pool = zfs_open(g_zfs, poolname,
 			    ZFS_TYPE_FILESYSTEM);
 			if (pool != NULL) {
-				if (mountpoint != NULL)
-					verify(zfs_prop_set(pool,
-					    zfs_prop_to_name(
-					    ZFS_PROP_MOUNTPOINT),
-					    mountpoint) == 0);
 				if (zfs_mount(pool, NULL, 0) == 0)
 					ret = zfs_shareall(pool);
 				zfs_close(pool);
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index ed460551c6..980615eae0 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -4507,7 +4507,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
 	 */
 	tmp_cb = list_head(&zcl.zcl_callbacks);
 	if (tmp_cb != NULL &&
-	    tmp_cb->zcd_txg > txg - ZTEST_COMMIT_CALLBACK_THRESH) {
+	    (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) {
 		fatal(0, "Commit callback threshold exceeded, oldest txg: %"
 		    PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg);
 	}
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index 36ab907508..27656b7526 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -4473,6 +4473,11 @@ zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
 	return (err);
 }
 
+/*
+ * Convert the zvol's volume size to an appropriate reservation.
+ * Note: If this routine is updated, it is necessary to update the ZFS test
+ * suite's shell version in reservation.kshlib.
+ */
 uint64_t
 zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props)
 {
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index 8315e1404b..0fd5f5738c 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -1088,7 +1088,6 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 	nvlist_t *zc_fsprops = NULL;
 	nvlist_t *zc_props = NULL;
 	char msg[1024];
-	char *altroot;
 	int ret = -1;
 
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
@@ -1187,21 +1186,6 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 		}
 	}
 
-	/*
-	 * If this is an alternate root pool, then we automatically set the
-	 * mountpoint of the root dataset to be '/'.
-	 */
-	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
-	    &altroot) == 0) {
-		zfs_handle_t *zhp;
-
-		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
-		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
-		    "/") == 0);
-
-		zfs_close(zhp);
-	}
-
 create_failed:
 	zcmd_free_nvlists(&zc);
 	nvlist_free(zc_props);
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh
index 2458cf350c..c2f3789891 100644..100755
--- a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh
@@ -103,12 +103,16 @@ do
 		[[ "$mpt" != "$mpt_val" ]] && \
 			log_fail "The value of mountpoint property is different\
 				from the output of zfs mount"
-		if [[ "$opt" == "-R $TESTDIR1" ]] || [[ "$opt" == "-m $TESTDIR1" ]];
-		then
+		if [[ "$opt" == "-m $TESTDIR1" ]]; then
 			[[ ! -d $TESTDIR1 ]] && \
 				log_fail "$TESTDIR1 is not created auotmatically."
 			[[ "$mpt" != "$TESTDIR1" ]] && \
 				log_fail "$TESTPOOL is not mounted on $TESTDIR1."
+		elif [[ "$opt" == "-R $TESTDIR1" ]]; then
+			[[ ! -d $TESTDIR1/$TESTPOOL ]] && \
+				log_fail "$TESTDIR1/$TESTPOOL is not created auotmatically."
+			[[ "$mpt" != "$TESTDIR1/$TESTPOOL" ]] && \
+				log_fail "$TESTPOOL is not mounted on $TESTDIR1/$TESTPOOL."
 		else
 			[[ ! -d ${TESTDIR1}$TESTDIR1 ]] && \
 				log_fail "${TESTDIR1}$TESTDIR1 is not created automatically."
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 81ee2e3ff3..67847d2d99 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -59,11 +59,11 @@
  * tight.
  *
  * 3. The Megiddo and Modha model assumes a fixed page size. All
- * elements of the cache are therefor exactly the same size.  So
+ * elements of the cache are therefore exactly the same size.  So
  * when adjusting the cache size following a cache miss, its simply
  * a matter of choosing a single page to evict.  In our model, we
  * have variable sized cache blocks (rangeing from 512 bytes to
- * 128K bytes).  We therefor choose a set of blocks to evict to make
+ * 128K bytes).  We therefore choose a set of blocks to evict to make
  * space for a cache miss that approximates as closely as possible
  * the space used by the new block.
  *
@@ -78,7 +78,7 @@
  * ways: 1) via a hash table lookup using the DVA as a key,
  * or 2) via one of the ARC lists.  The arc_read() interface
  * uses method 1, while the internal arc algorithms for
- * adjusting the cache use method 2.  We therefor provide two
+ * adjusting the cache use method 2.  We therefore provide two
  * types of locks: 1) the hash table lock array, and 2) the
  * arc list locks.
  *
@@ -258,7 +258,18 @@ typedef struct arc_stats {
 	kstat_named_t arcstat_mfu_ghost_hits;
 	kstat_named_t arcstat_deleted;
 	kstat_named_t arcstat_recycle_miss;
+	/*
+	 * Number of buffers that could not be evicted because the hash lock
+	 * was held by another thread.  The lock may not necessarily be held
+	 * by something using the same buffer, since hash locks are shared
+	 * by multiple buffers.
+	 */
 	kstat_named_t arcstat_mutex_miss;
+	/*
+	 * Number of buffers skipped because they have I/O in progress, are
+	 * indrect prefetch buffers that have not lived long enough, or are
+	 * not from the spa we're trying to evict from.
+	 */
 	kstat_named_t arcstat_evict_skip;
 	kstat_named_t arcstat_evict_l2_cached;
 	kstat_named_t arcstat_evict_l2_eligible;
@@ -376,7 +387,7 @@ static arc_stats_t arc_stats = {
 #define	ARCSTAT(stat)	(arc_stats.stat.value.ui64)
 
 #define	ARCSTAT_INCR(stat, val) \
-	atomic_add_64(&arc_stats.stat.value.ui64, (val));
+	atomic_add_64(&arc_stats.stat.value.ui64, (val))
 
 #define	ARCSTAT_BUMP(stat)	ARCSTAT_INCR(stat, 1)
 #define	ARCSTAT_BUMPDOWN(stat)	ARCSTAT_INCR(stat, -1)
@@ -604,9 +615,7 @@ uint64_t zfs_crc64_table[256];
 #define	l2arc_writes_sent	ARCSTAT(arcstat_l2_writes_sent)
 #define	l2arc_writes_done	ARCSTAT(arcstat_l2_writes_done)
 
-/*
- * L2ARC Performance Tunables
- */
+/* L2ARC Performance Tunables */
 uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;	/* default max write size */
 uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE;	/* extra write during warmup */
 uint64_t l2arc_headroom = L2ARC_HEADROOM;	/* number of dev writes */
@@ -3001,6 +3010,10 @@ top:
 
 		mutex_exit(hash_lock);
 
+		/*
+		 * At this point, we have a level 1 cache miss.  Try again in
+		 * L2ARC if possible.
+		 */
 		ASSERT3U(hdr->b_size, ==, size);
 		DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
 		    uint64_t, size, zbookmark_t *, zb);
@@ -3243,8 +3256,8 @@ arc_buf_evict(arc_buf_t *buf)
 }
 
 /*
- * Release this buffer from the cache.  This must be done
- * after a read and prior to modifying the buffer contents.
+ * Release this buffer from the cache, making it an anonymous buffer.  This
+ * must be done after a read and prior to modifying the buffer contents.
  * If the buffer has more than one reference, we must make
  * a new hdr for the buffer.
  */
@@ -3633,7 +3646,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
 
 	/*
 	 * Writes will, almost always, require additional memory allocations
-	 * in order to compress/encrypt/etc the data.  We therefor need to
+	 * in order to compress/encrypt/etc the data.  We therefore need to
 	 * make sure that there is sufficient available memory for this.
 	 */
 	if (error = arc_memory_throttle(reserve, anon_size, txg))
diff --git a/usr/src/uts/common/fs/zfs/bptree.c b/usr/src/uts/common/fs/zfs/bptree.c
index 73922db88b..a0c90cc4d9 100644
--- a/usr/src/uts/common/fs/zfs/bptree.c
+++ b/usr/src/uts/common/fs/zfs/bptree.c
@@ -43,7 +43,7 @@
  * dsl_scan_sync. This allows the delete operation to finish without traversing
  * all the dataset's blocks.
  *
- * Note that while bt_begin and bt_end are only ever incremented in this code
+ * Note that while bt_begin and bt_end are only ever incremented in this code,
  * they are effectively reset to 0 every time the entire bptree is freed because
  * the bptree's object is destroyed and re-created.
  */
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index f4c1904543..9f4c8a8e35 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -653,6 +653,14 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 		if (!havepzio)
 			err = zio_wait(zio);
 	} else {
+		/*
+		 * Another reader came in while the dbuf was in flight
+		 * between UNCACHED and CACHED.  Either a writer will finish
+		 * writing the buffer (sending the dbuf to CACHED) or the
+		 * first reader's request will reach the read_done callback
+		 * and send the dbuf to CACHED.  Otherwise, a failure
+		 * occurred and the dbuf went to UNCACHED.
+		 */
 		mutex_exit(&db->db_mtx);
 		if (prefetch)
 			dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
@@ -661,6 +669,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 			rw_exit(&dn->dn_struct_rwlock);
 		DB_DNODE_EXIT(db);
 
+		/* Skip the wait per the caller's request. */
 		mutex_enter(&db->db_mtx);
 		if ((flags & DB_RF_NEVERWAIT) == 0) {
 			while (db->db_state == DB_READ ||
@@ -1276,7 +1285,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 }
 
 /*
- * Return TRUE if this evicted the dbuf.
+ * Undirty a buffer in the transaction group referenced by the given
+ * transaction.  Return whether this evicted the dbuf.
  */
 static boolean_t
 dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
@@ -2237,6 +2247,7 @@ dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 	ASSERT(db->db_level > 0);
 	DBUF_VERIFY(db);
 
+	/* Read the block if it hasn't been read yet. */
 	if (db->db_buf == NULL) {
 		mutex_exit(&db->db_mtx);
 		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
@@ -2247,10 +2258,12 @@ dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
+	/* Indirect block size must match what the dnode thinks it is. */
 	ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
 	dbuf_check_blkptr(dn, db);
 	DB_DNODE_EXIT(db);
 
+	/* Provide the pending dirty record to child dbufs */
 	db->db_data_pending = dr;
 
 	mutex_exit(&db->db_mtx);
@@ -2637,6 +2650,7 @@ dbuf_write_override_done(zio_t *zio)
 	dbuf_write_done(zio, NULL, db);
 }
 
+/* Issue I/O to commit a dirty buffer to disk. */
 static void
 dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
 {
@@ -2671,11 +2685,19 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
 	}
 
 	if (parent != dn->dn_dbuf) {
+		/* Our parent is an indirect block. */
+		/* We have a dirty parent that has been scheduled for write. */
 		ASSERT(parent && parent->db_data_pending);
+		/* Our parent's buffer is one level closer to the dnode. */
 		ASSERT(db->db_level == parent->db_level-1);
+		/*
+		 * We're about to modify our parent's db_data by modifying
+		 * our block pointer, so the parent must be released.
+		 */
 		ASSERT(arc_released(parent->db_buf));
 		zio = parent->db_data_pending->dr_zio;
 	} else {
+		/* Our parent is the dnode itself. */
 		ASSERT((db->db_level == dn->dn_phys->dn_nlevels-1 &&
 		    db->db_blkid != DMU_SPILL_BLKID) ||
 		    (db->db_blkid == DMU_SPILL_BLKID && db->db_level == 0));
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index a616fd37cf..a3640fd593 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -1827,7 +1827,7 @@ dmu_init(void)
 void
 dmu_fini(void)
 {
-	arc_fini();
+	arc_fini(); /* arc depends on l2arc, so arc must go first */
 	l2arc_fini();
 	zfetch_fini();
 	dbuf_fini();
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index ad4084021d..e30c6d345e 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -1016,6 +1016,10 @@ dmu_tx_unassign(dmu_tx_t *tx)
 
 	txg_rele_to_quiesce(&tx->tx_txgh);
 
+	/*
+	 * Walk the transaction's hold list, removing the hold on the
+	 * associated dnode, and notifying waiters if the refcount drops to 0.
+	 */
 	for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh;
 	    txh = list_next(&tx->tx_holds, txh)) {
 		dnode_t *dn = txh->txh_dnode;
@@ -1128,6 +1132,10 @@ dmu_tx_commit(dmu_tx_t *tx)
 
 	ASSERT(tx->tx_txg != 0);
 
+	/*
+	 * Go through the transaction's hold list and remove holds on
+	 * associated dnodes, notifying waiters if no holds remain.
+	 */
 	while (txh = list_head(&tx->tx_holds)) {
 		dnode_t *dn = txh->txh_dnode;
 
diff --git a/usr/src/uts/common/fs/zfs/dmu_zfetch.c b/usr/src/uts/common/fs/zfs/dmu_zfetch.c
index 37037c30f6..2ebfa183aa 100644
--- a/usr/src/uts/common/fs/zfs/dmu_zfetch.c
+++ b/usr/src/uts/common/fs/zfs/dmu_zfetch.c
@@ -48,11 +48,11 @@ uint32_t	zfetch_block_cap = 256;
 uint64_t	zfetch_array_rd_sz = 1024 * 1024;
 
 /* forward decls for static routines */
-static int		dmu_zfetch_colinear(zfetch_t *, zstream_t *);
+static boolean_t	dmu_zfetch_colinear(zfetch_t *, zstream_t *);
 static void		dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
 static uint64_t		dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
 static uint64_t		dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
-static int		dmu_zfetch_find(zfetch_t *, zstream_t *, int);
+static boolean_t	dmu_zfetch_find(zfetch_t *, zstream_t *, int);
 static int		dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
 static zstream_t	*dmu_zfetch_stream_reclaim(zfetch_t *);
 static void		dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
@@ -104,9 +104,9 @@ kstat_t		*zfetch_ksp;
  * last stream, then we are probably in a strided access pattern.  So
  * combine the two sequential streams into a single strided stream.
  *
- * If no co-linear streams are found, return NULL.
+ * Returns whether co-linear streams were found.
  */
-static int
+static boolean_t
 dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
 {
 	zstream_t	*z_walk;
@@ -326,7 +326,7 @@ dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
  * for this block read.  If so, it starts a prefetch for the stream it
  * located and returns true, otherwise it returns false
  */
-static int
+static boolean_t
 dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
 {
 	zstream_t	*zs;
@@ -639,7 +639,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
 {
 	zstream_t	zst;
 	zstream_t	*newstream;
-	int		fetched;
+	boolean_t	fetched;
 	int		inserted;
 	unsigned int	blkshft;
 	uint64_t	blksz;
@@ -665,7 +665,8 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
 		ZFETCHSTAT_BUMP(zfetchstat_hits);
 	} else {
 		ZFETCHSTAT_BUMP(zfetchstat_misses);
-		if (fetched = dmu_zfetch_colinear(zf, &zst)) {
+		fetched = dmu_zfetch_colinear(zf, &zst);
+		if (fetched) {
 			ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
 		} else {
 			ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c
index 417e219b88..92996a6f12 100644
--- a/usr/src/uts/common/fs/zfs/dnode.c
+++ b/usr/src/uts/common/fs/zfs/dnode.c
@@ -1803,14 +1803,16 @@ dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
 }
 
 /*
- * This function scans a block at the indicated "level" looking for
- * a hole or data (depending on 'flags').  If level > 0, then we are
- * scanning an indirect block looking at its pointers.  If level == 0,
- * then we are looking at a block of dnodes.  If we don't find what we
- * are looking for in the block, we return ESRCH.  Otherwise, return
- * with *offset pointing to the beginning (if searching forwards) or
- * end (if searching backwards) of the range covered by the block
- * pointer we matched on (or dnode).
+ * Scans a block at the indicated "level" looking for a hole or data,
+ * depending on 'flags'.
+ *
+ * If level > 0, then we are scanning an indirect block looking at its
+ * pointers.  If level == 0, then we are looking at a block of dnodes.
+ *
+ * If we don't find what we are looking for in the block, we return ESRCH.
+ * Otherwise, return with *offset pointing to the beginning (if searching
+ * forwards) or end (if searching backwards) of the range covered by the
+ * block pointer we matched on (or dnode).
  *
  * The basic search algorithm used below by dnode_next_offset() is to
  * use this function to search up the block tree (widen the search) until
diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c
index 7d47ce02b4..2a1094be24 100644
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c
@@ -302,7 +302,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
 }
 
 /*
- * free_range: Traverse the indicated range of the provided file
+ * Traverse the indicated range of the provided file
  * and "free" all the blocks contained there.
  */
 static void
@@ -370,7 +370,7 @@ dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
 }
 
 /*
- * Try to kick all the dnodes dbufs out of the cache...
+ * Try to kick all the dnode's dbufs out of the cache...
  */
 void
 dnode_evict_dbufs(dnode_t *dn)
diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c
index bfc8b06d03..d59b6fa052 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c
@@ -356,8 +356,10 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 
 	/* Make sure dsobj has the correct object type. */
 	dmu_object_info_from_db(dbuf, &doi);
-	if (doi.doi_type != DMU_OT_DSL_DATASET)
+	if (doi.doi_type != DMU_OT_DSL_DATASET) {
+		dmu_buf_rele(dbuf, tag);
 		return (SET_ERROR(EINVAL));
+	}
 
 	ds = dmu_buf_get_user(dbuf);
 	if (ds == NULL) {
diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c
index 8e0e089448..2eada5cd16 100644
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c
@@ -380,7 +380,7 @@ dsl_prop_predict(dsl_dir_t *dd, const char *propname,
 
 /*
  * Unregister this callback.  Return 0 on success, ENOENT if ddname is
- * invalid, ENOMSG if no matching callback registered.
+ * invalid, or ENOMSG if no matching callback registered.
  */
 int
 dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
diff --git a/usr/src/uts/common/fs/zfs/dsl_userhold.c b/usr/src/uts/common/fs/zfs/dsl_userhold.c
index fa9d937085..568bba33b5 100644
--- a/usr/src/uts/common/fs/zfs/dsl_userhold.c
+++ b/usr/src/uts/common/fs/zfs/dsl_userhold.c
@@ -433,7 +433,7 @@ dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 		dsl_dataset_name(ds, name);
 		dsl_dataset_rele(ds, FTAG);
 		dsl_pool_config_exit(dp, FTAG);
-		zfs_unmount_snap(name);
+		(void) zfs_unmount_snap(name);
 	} else {
 		dsl_pool_config_exit(dp, FTAG);
 	}
diff --git a/usr/src/uts/common/fs/zfs/sa.c b/usr/src/uts/common/fs/zfs/sa.c
index 996f6e1443..6a87231c7b 100644
--- a/usr/src/uts/common/fs/zfs/sa.c
+++ b/usr/src/uts/common/fs/zfs/sa.c
@@ -111,6 +111,7 @@
  * location.
  *
  * Byteswap implications:
+ *
  * Since the SA attributes are not entirely self describing we can't do
  * the normal byteswap processing.  The special ZAP layout attribute and
  * attribute registration attributes define the byteswap function and the
@@ -189,7 +190,6 @@ sa_attr_reg_t sa_legacy_attrs[] = {
 };
 
 /*
- * ZPL legacy layout
  * This is only used for objects of type DMU_OT_ZNODE
  */
 sa_attr_type_t sa_legacy_zpl_layout[] = {
@@ -199,7 +199,6 @@ sa_attr_type_t sa_legacy_zpl_layout[] = {
 /*
  * Special dummy layout used for buffers with no attributes.
  */
-
 sa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
 
 static int sa_legacy_attr_count = 16;
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 0fc3e66904..7334d39516 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -26,6 +26,8 @@
  */
 
 /*
+ * SPA: Storage Pool Allocator
+ *
  * This file contains all the routines used when modifying on-disk SPA state.
  * This includes opening, importing, destroying, exporting a pool, and syncing a
  * pool.
@@ -77,6 +79,12 @@
 #include "zfs_prop.h"
 #include "zfs_comutil.h"
 
+/*
+ * The interval, in seconds, at which failed configuration cache file writes
+ * should be retried.
+ */
+static int zfs_ccw_retry_interval = 300;
+
 typedef enum zti_modes {
 	ZTI_MODE_FIXED,			/* value is # of threads (min 1) */
 	ZTI_MODE_ONLINE_PERCENT,	/* value is % of online CPUs */
@@ -4514,6 +4522,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 
 /*
  * Detach a device from a mirror or replacing vdev.
+ *
  * If 'replace_done' is specified, only detach if the parent
  * is a replacing vdev.
  */
@@ -5168,11 +5177,9 @@ spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
  * the spa_vdev_config_[enter/exit] functions which allow us to
  * grab and release the spa_config_lock while still holding the namespace
  * lock.  During each step the configuration is synced out.
- */
-
-/*
- * Remove a device from the pool.  Currently, this supports removing only hot
- * spares, slogs, and level 2 ARC devices.
+ *
+ * Currently, this supports removing only hot spares, slogs, and level 2 ARC
+ * devices.
  */
 int
 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
@@ -5282,7 +5289,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
 
 /*
  * Find any device that's done replacing, or a vdev marked 'unspare' that's
- * current spared, so we can detach it.
+ * currently spared, so we can detach it.
  */
 static vdev_t *
 spa_vdev_resilver_done_hunt(vdev_t *vd)
@@ -5661,13 +5668,34 @@ spa_async_resume(spa_t *spa)
 	mutex_exit(&spa->spa_async_lock);
 }
 
+static boolean_t
+spa_async_tasks_pending(spa_t *spa)
+{
+	uint_t non_config_tasks;
+	uint_t config_task;
+	boolean_t config_task_suspended;
+
+	non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE;
+	config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE;
+	if (spa->spa_ccw_fail_time == 0) {
+		config_task_suspended = B_FALSE;
+	} else {
+		config_task_suspended =
+		    (gethrtime() - spa->spa_ccw_fail_time) <
+		    (zfs_ccw_retry_interval * NANOSEC);
+	}
+
+	return (non_config_tasks || (config_task && !config_task_suspended));
+}
+
 static void
 spa_async_dispatch(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
-	if (spa->spa_async_tasks && !spa->spa_async_suspended &&
+	if (spa_async_tasks_pending(spa) &&
+	    !spa->spa_async_suspended &&
 	    spa->spa_async_thread == NULL &&
-	    rootdir != NULL && !vn_is_readonly(rootdir))
+	    rootdir != NULL)
 		spa->spa_async_thread = thread_create(NULL, 0,
 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
 	mutex_exit(&spa->spa_async_lock);
diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c
index b113ce9e0c..d97fc32fbf 100644
--- a/usr/src/uts/common/fs/zfs/spa_config.c
+++ b/usr/src/uts/common/fs/zfs/spa_config.c
@@ -26,6 +26,7 @@
  */
 
 #include <sys/spa.h>
+#include <sys/fm/fs/zfs.h>
 #include <sys/spa_impl.h>
 #include <sys/nvpair.h>
 #include <sys/uio.h>
@@ -140,7 +141,7 @@ out:
 	kobj_close_file(file);
 }
 
-static void
+static int
 spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 {
 	size_t buflen;
@@ -148,13 +149,14 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 	vnode_t *vp;
 	int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
 	char *temp;
+	int err;
 
 	/*
 	 * If the nvlist is empty (NULL), then remove the old cachefile.
 	 */
 	if (nvl == NULL) {
-		(void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
-		return;
+		err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
+		return (err);
 	}
 
 	/*
@@ -175,12 +177,14 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 	 */
 	(void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
 
-	if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) {
-		if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
-		    0, RLIM64_INFINITY, kcred, NULL) == 0 &&
-		    VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) {
-			(void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
-		}
+	err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
+	if (err == 0) {
+		err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
+		    0, RLIM64_INFINITY, kcred, NULL);
+		if (err == 0)
+			err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
+		if (err == 0)
+			err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
 		(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
 		VN_RELE(vp);
 	}
@@ -189,6 +193,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 
 	kmem_free(buf, buflen);
 	kmem_free(temp, MAXPATHLEN);
+	return (err);
 }
 
 /*
@@ -200,6 +205,8 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
 {
 	spa_config_dirent_t *dp, *tdp;
 	nvlist_t *nvl;
+	boolean_t ccw_failure;
+	int error;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
@@ -211,6 +218,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
 	 * cachefile is changed, the new one is pushed onto this list, allowing
 	 * us to update previous cachefiles that no longer contain this pool.
 	 */
+	ccw_failure = B_FALSE;
 	for (dp = list_head(&target->spa_config_list); dp != NULL;
 	    dp = list_next(&target->spa_config_list, dp)) {
 		spa_t *spa = NULL;
@@ -251,10 +259,32 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
 			mutex_exit(&spa->spa_props_lock);
 		}
 
-		spa_config_write(dp, nvl);
+		error = spa_config_write(dp, nvl);
+		if (error != 0)
+			ccw_failure = B_TRUE;
 		nvlist_free(nvl);
 	}
 
+	if (ccw_failure) {
+		/*
+		 * Keep trying so that configuration data is
+		 * written if/when any temporary filesystem
+		 * resource issues are resolved.
+		 */
+		if (target->spa_ccw_fail_time == 0) {
+			zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
+			    target, NULL, NULL, 0, 0);
+		}
+		target->spa_ccw_fail_time = gethrtime();
+		spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
+	} else {
+		/*
+		 * Do not rate limit future attempts to update
+		 * the config cache.
+		 */
+		target->spa_ccw_fail_time = 0;
+	}
+
 	/*
 	 * Remove any config entries older than the current one.
 	 */
@@ -317,6 +347,7 @@ spa_config_set(spa_t *spa, nvlist_t *config)
 
 /*
  * Generate the pool's configuration based on the current in-core state.
+ *
  * We infer whether to generate a complete config or just one top-level config
  * based on whether vd is the root vdev.
  */
diff --git a/usr/src/uts/common/fs/zfs/spa_errlog.c b/usr/src/uts/common/fs/zfs/spa_errlog.c
index 9152846d6e..0dd6c7a489 100644
--- a/usr/src/uts/common/fs/zfs/spa_errlog.c
+++ b/usr/src/uts/common/fs/zfs/spa_errlog.c
@@ -183,8 +183,10 @@ process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count)
 
 		if (copyout(&zb, (char *)addr +
 		    (*count - 1) * sizeof (zbookmark_t),
-		    sizeof (zbookmark_t)) != 0)
+		    sizeof (zbookmark_t)) != 0) {
+			zap_cursor_fini(&zc);
 			return (SET_ERROR(EFAULT));
+		}
 
 		*count -= 1;
 	}
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index e57d8ab143..2b8a071cb0 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -1334,7 +1334,7 @@ zfs_panic_recover(const char *fmt, ...)
 
 /*
  * This is a stripped-down version of strtoull, suitable only for converting
- * lowercase hexidecimal numbers that don't overflow.
+ * lowercase hexadecimal numbers that don't overflow.
  */
 uint64_t
 strtonum(const char *str, char **nptr)
diff --git a/usr/src/uts/common/fs/zfs/sys/ddt.h b/usr/src/uts/common/fs/zfs/sys/ddt.h
index 9724d6eceb..771610677e 100644
--- a/usr/src/uts/common/fs/zfs/sys/ddt.h
+++ b/usr/src/uts/common/fs/zfs/sys/ddt.h
@@ -63,16 +63,15 @@ enum ddt_class {
  */
 typedef struct ddt_key {
 	zio_cksum_t	ddk_cksum;	/* 256-bit block checksum */
-	uint64_t	ddk_prop;	/* LSIZE, PSIZE, compression */
+	/*
+	 * Encoded with logical & physical size, and compression, as follows:
+	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
+	 *   |   0   |   0   |   0   | comp  |     PSIZE     |     LSIZE     |
+	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
+	 */
+	uint64_t	ddk_prop;
 } ddt_key_t;
 
-/*
- * ddk_prop layout:
- *
- *	+-------+-------+-------+-------+-------+-------+-------+-------+
- *	|   0	|   0	|   0	| comp	|     PSIZE	|     LSIZE	|
- *	+-------+-------+-------+-------+-------+-------+-------+-------+
- */
 #define	DDK_GET_LSIZE(ddk)	\
 	BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
 #define	DDK_SET_LSIZE(ddk, x)	\
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index 1366a998fd..6e07a156dc 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -409,6 +409,8 @@ void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
  * object must be held in an assigned transaction before calling
  * dmu_buf_will_dirty.  You may use dmu_buf_set_user() on the bonus
  * buffer as well.  You must release your hold with dmu_buf_rele().
+ *
+ * Returns ENOENT, EIO, or 0.
  */
 int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
 int dmu_bonus_max(void);
@@ -664,8 +666,14 @@ extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
  * If doi is NULL, just indicates whether the object exists.
  */
 int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
+/* Like dmu_object_info, but faster if you have a held dnode in hand. */
 void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
+/* Like dmu_object_info, but faster if you have a held dbuf in hand. */
 void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
+/*
+ * Like dmu_object_info_from_db, but faster still when you only care about
+ * the size.  This is specifically optimized for zfs_getattr().
+ */
 void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
     u_longlong_t *nblk512);
 
diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h
index 9f9134d8cd..c3de03d369 100644
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h
@@ -145,9 +145,8 @@ typedef struct dnode_phys {
 
 typedef struct dnode {
 	/*
-	 * dn_struct_rwlock protects the structure of the dnode,
-	 * including the number of levels of indirection (dn_nlevels),
-	 * dn_maxblkid, and dn_next_*
+	 * Protects the structure of the dnode, including the number of levels
+	 * of indirection (dn_nlevels), dn_maxblkid, and dn_next_*
 	 */
 	krwlock_t dn_struct_rwlock;
 
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
index b0160edfb1..d3b411ba57 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
@@ -110,6 +110,7 @@ typedef struct dsl_pool {
 
 	/*
 	 * Protects administrative changes (properties, namespace)
+	 *
 	 * It is only held for write in syncing context.  Therefore
 	 * syncing context does not need to ever have it for read, since
 	 * nobody else could possibly have it for write.
diff --git a/usr/src/uts/common/fs/zfs/sys/sa_impl.h b/usr/src/uts/common/fs/zfs/sys/sa_impl.h
index 8ae05ce364..582bd76f01 100644
--- a/usr/src/uts/common/fs/zfs/sys/sa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/sa_impl.h
@@ -150,6 +150,7 @@ struct sa_os {
 
 /*
  * header for all bonus and spill buffers.
+ *
  * The header has a fixed portion with a variable number
  * of "lengths" depending on the number of variable sized
  * attribues which are determined by the "layout number"
@@ -158,29 +159,27 @@ struct sa_os {
 #define	SA_MAGIC	0x2F505A  /* ZFS SA */
 typedef struct sa_hdr_phys {
 	uint32_t sa_magic;
-	uint16_t sa_layout_info;  /* Encoded with hdrsize and layout number */
+	/*
+	 * Encoded with hdrsize and layout number as follows:
+	 * 16      10       0
+	 * +--------+-------+
+	 * | hdrsz  |layout |
+	 * +--------+-------+
+	 *
+	 * Bits 0-10 are the layout number
+	 * Bits 11-16 are the size of the header.
+	 * The hdrsize is the number * 8
+	 *
+	 * For example.
+	 * hdrsz of 1 ==> 8 byte header
+	 *          2 ==> 16 byte header
+	 *
+	 */
+	uint16_t sa_layout_info;
 	uint16_t sa_lengths[1];	/* optional sizes for variable length attrs */
 	/* ... Data follows the lengths.  */
 } sa_hdr_phys_t;
 
-/*
- * sa_hdr_phys -> sa_layout_info
- *
- * 16      10       0
- * +--------+-------+
- * | hdrsz  |layout |
- * +--------+-------+
- *
- * Bits 0-10 are the layout number
- * Bits 11-16 are the size of the header.
- * The hdrsize is the number * 8
- *
- * For example.
- * hdrsz of 1 ==> 8 byte header
- *          2 ==> 16 byte header
- *
- */
-
 #define	SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
 #define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 6, 3, 0)
 #define	SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index 983103e386..66ea159475 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -238,8 +238,9 @@ struct spa {
 	uint64_t	spa_deadman_synctime;	/* deadman expiration timer */
 	kmutex_t	spa_iokstat_lock;	/* protects spa_iokstat_* */
 	struct kstat	*spa_iokstat;		/* kstat of io to this pool */
+	hrtime_t	spa_ccw_fail_time;	/* Conf cache write fail time */
 	/*
-	 * spa_refcnt & spa_config_lock must be the last elements
+	 * spa_refcount & spa_config_lock must be the last elements
 	 * because refcount_t changes size based on compilation options.
 	 * In order for the MDB module to function correctly, the other
 	 * fields must remain in the same location.
diff --git a/usr/src/uts/common/fs/zfs/sys/space_map.h b/usr/src/uts/common/fs/zfs/sys/space_map.h
index 64223daf62..c0070da670 100644
--- a/usr/src/uts/common/fs/zfs/sys/space_map.h
+++ b/usr/src/uts/common/fs/zfs/sys/space_map.h
@@ -94,7 +94,6 @@ struct space_map_ops {
  *   63  62    60 59        50 49                               0
  *
  *
- *
  * non-debug entry
  *
  *    1               47                   1           15
diff --git a/usr/src/uts/common/fs/zfs/sys/unique.h b/usr/src/uts/common/fs/zfs/sys/unique.h
index 2ef3093edf..d4ba32e5c6 100644
--- a/usr/src/uts/common/fs/zfs/sys/unique.h
+++ b/usr/src/uts/common/fs/zfs/sys/unique.h
@@ -26,8 +26,6 @@
 #ifndef	_SYS_UNIQUE_H
 #define	_SYS_UNIQUE_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/zfs_context.h>
 
 #ifdef	__cplusplus
@@ -42,7 +40,7 @@ void unique_fini(void);
 
 /*
  * Return a new unique value (which will not be uniquified against until
- * it is unique_insert()-ed.
+ * it is unique_insert()-ed).
  */
 uint64_t unique_create(void);
 
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index c599c549ac..02e3e838c3 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -246,12 +246,13 @@ typedef struct vdev_label {
 #define	VDD_METASLAB	0x01
 #define	VDD_DTL		0x02
 
+/* Offset of embedded boot loader region on each label */
+#define	VDEV_BOOT_OFFSET	(2 * sizeof (vdev_label_t))
 /*
- * Size and offset of embedded boot loader region on each label.
+ * Size of embedded boot loader region on each label.
  * The total size of the first two labels plus the boot area is 4MB.
  */
-#define	VDEV_BOOT_OFFSET	(2 * sizeof (vdev_label_t))
-#define	VDEV_BOOT_SIZE		(7ULL << 19)			/* 3.5M	*/
+#define	VDEV_BOOT_SIZE		(7ULL << 19)			/* 3.5M */
 
 /*
  * Size of label regions at the start and end of each leaf device.
@@ -318,8 +319,9 @@ extern uint64_t vdev_get_min_asize(vdev_t *vd);
 extern void vdev_set_min_asize(vdev_t *vd);
 
 /*
- * zdb uses this tunable, so it must be declared here to make lint happy.
+ * Global variables
  */
+/* zdb uses this tunable, so it must be declared here to make lint happy. */
 extern int zfs_vdev_cache_size;
 
 /*
diff --git a/usr/src/uts/common/fs/zfs/sys/zap.h b/usr/src/uts/common/fs/zfs/sys/zap.h
index 1e975e99e0..20a66edf85 100644
--- a/usr/src/uts/common/fs/zfs/sys/zap.h
+++ b/usr/src/uts/common/fs/zfs/sys/zap.h
@@ -86,18 +86,22 @@ extern "C" {
 #endif
 
 /*
- * The matchtype specifies which entry will be accessed.
- * MT_EXACT: only find an exact match (non-normalized)
- * MT_FIRST: find the "first" normalized (case and Unicode
- *     form) match; the designated "first" match will not change as long
- *     as the set of entries with this normalization doesn't change
- * MT_BEST: if there is an exact match, find that, otherwise find the
- *     first normalized match
+ * Specifies matching criteria for ZAP lookups.
  */
 typedef enum matchtype
 {
+	/* Only find an exact match (non-normalized) */
 	MT_EXACT,
+	/*
+	 * If there is an exact match, find that, otherwise find the
+	 * first normalized match.
+	 */
 	MT_BEST,
+	/*
+	 * Find the "first" normalized (case and Unicode form) match;
+	 * the designated "first" match will not change as long as the
+	 * set of entries with this normalization doesn't change.
+	 */
 	MT_FIRST
 } matchtype_t;
 
@@ -174,16 +178,21 @@ int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
  * call will fail and return EINVAL.
  *
  * If 'integer_size' is equal to or larger than the attribute's integer
- * size, the call will succeed and return 0.  * When converting to a
- * larger integer size, the integers will be treated as unsigned (ie. no
- * sign-extension will be performed).
+ * size, the call will succeed and return 0.
+ *
+ * When converting to a larger integer size, the integers will be treated as
+ * unsigned (ie. no sign-extension will be performed).
  *
  * 'num_integers' is the length (in integers) of 'buf'.
  *
  * If the attribute is longer than the buffer, as many integers as will
  * fit will be transferred to 'buf'.  If the entire attribute was not
  * transferred, the call will return EOVERFLOW.
- *
+ */
+int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
+    uint64_t integer_size, uint64_t num_integers, void *buf);
+
+/*
  * If rn_len is nonzero, realname will be set to the name of the found
  * entry (which may be different from the requested name if matchtype is
  * not MT_EXACT).
@@ -191,8 +200,6 @@ int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
  * If normalization_conflictp is not NULL, it will be set if there is
  * another name with the same case/unicode normalized form.
  */
-int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
-    uint64_t integer_size, uint64_t num_integers, void *buf);
 int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
     uint64_t integer_size, uint64_t num_integers, void *buf,
     matchtype_t mt, char *realname, int rn_len,
diff --git a/usr/src/uts/common/fs/zfs/sys/zap_leaf.h b/usr/src/uts/common/fs/zfs/sys/zap_leaf.h
index 3a33636741..f6947a72d7 100644
--- a/usr/src/uts/common/fs/zfs/sys/zap_leaf.h
+++ b/usr/src/uts/common/fs/zfs/sys/zap_leaf.h
@@ -101,6 +101,7 @@ typedef enum zap_chunk_type {
  */
 typedef struct zap_leaf_phys {
 	struct zap_leaf_header {
+		/* Public to ZAP */
 		uint64_t lh_block_type;		/* ZBT_LEAF */
 		uint64_t lh_pad1;
 		uint64_t lh_prefix;		/* hash prefix of this leaf */
@@ -109,8 +110,7 @@ typedef struct zap_leaf_phys {
 		uint16_t lh_nentries;		/* number of entries */
 		uint16_t lh_prefix_len;		/* num bits used to id this */
 
-/* above is accessable to zap, below is zap_leaf private */
-
+		/* Private to zap_leaf */
 		uint16_t lh_freelist;		/* chunk head of free list */
 		uint8_t lh_flags;		/* ZLF_* flags */
 		uint8_t lh_pad2[11];
@@ -161,13 +161,13 @@ typedef struct zap_leaf {
 
 
 typedef struct zap_entry_handle {
-	/* below is set by zap_leaf.c and is public to zap.c */
+	/* Set by zap_leaf and public to ZAP */
 	uint64_t zeh_num_integers;
 	uint64_t zeh_hash;
 	uint32_t zeh_cd;
 	uint8_t zeh_integer_size;
 
-	/* below is private to zap_leaf.c */
+	/* Private to zap_leaf */
 	uint16_t zeh_fakechunk;
 	uint16_t *zeh_chunkp;
 	zap_leaf_t *zeh_leaf;
@@ -202,7 +202,7 @@ extern int zap_entry_read_name(struct zap *zap, const zap_entry_handle_t *zeh,
 /*
  * Replace the value of an existing entry.
  *
- * zap_entry_update may fail if it runs out of space (ENOSPC).
+ * May fail if it runs out of space (ENOSPC).
  */
 extern int zap_entry_update(zap_entry_handle_t *zeh,
     uint8_t integer_size, uint64_t num_integers, const void *buf);
@@ -221,10 +221,7 @@ extern int zap_entry_create(zap_leaf_t *l, struct zap_name *zn, uint32_t cd,
     uint8_t integer_size, uint64_t num_integers, const void *buf,
     zap_entry_handle_t *zeh);
 
-/*
- * Return true if there are additional entries with the same normalized
- * form.
- */
+/* Determine whether there is another entry with the same normalized form. */
 extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
     struct zap_name *zn, const char *name, struct zap *zap);
 
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_acl.h b/usr/src/uts/common/fs/zfs/sys/zfs_acl.h
index d1a64180d5..4eefdc563f 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_acl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_acl.h
@@ -46,7 +46,8 @@ struct znode_phys;
 #define	ZFS_ACL_VERSION		ZFS_ACL_VERSION_FUID
 
 /*
- * ZFS ACLs are store in various forms.
+ * ZFS ACLs (Access Control Lists) are stored in various forms.
+ *
  * Files created with ACL version ZFS_ACL_VERSION_INITIAL
  * will all be created with fixed length ACEs of type
  * zfs_oldace_t.
@@ -136,8 +137,8 @@ typedef struct acl_ops {
 	size_t		(*ace_size)(void *acep); /* how big is this ace */
 	size_t		(*ace_abstract_size)(void); /* sizeof abstract entry */
 	int		(*ace_mask_off)(void); /* off of access mask in ace */
+	/* ptr to data if any */
 	int		(*ace_data)(void *acep, void **datap);
-			    /* ptr to data if any */
 } acl_ops_t;
 
 /*
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 874d422568..9422177023 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -344,7 +344,7 @@ extern int zfs_secpolicy_rename_perms(const char *from,
     const char *to, cred_t *cr);
 extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
 extern int zfs_busy(void);
-extern void zfs_unmount_snap(const char *);
+extern int zfs_unmount_snap(const char *);
 extern void zfs_destroy_unmount_origin(const char *);
 
 /*
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_rlock.h b/usr/src/uts/common/fs/zfs/sys/zfs_rlock.h
index f302b663e2..93733ba8a2 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_rlock.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_rlock.h
@@ -26,8 +26,6 @@
 #ifndef	_SYS_FS_ZFS_RLOCK_H
 #define	_SYS_FS_ZFS_RLOCK_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -57,16 +55,14 @@ typedef struct rl {
 } rl_t;
 
 /*
- * Lock a range (offset, length) as either shared (READER)
- * or exclusive (WRITER or APPEND). APPEND is a special type that
- * is converted to WRITER that specified to lock from the start of the
- * end of file.  zfs_range_lock() returns the range lock structure.
+ * Lock a range (offset, length) as either shared (RL_READER)
+ * or exclusive (RL_WRITER or RL_APPEND).  RL_APPEND is a special type that
+ * is converted to RL_WRITER that specified to lock from the start of the
+ * end of file.  Returns the range lock structure.
  */
 rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
 
-/*
- * Unlock range and destroy range lock structure.
- */
+/* Unlock range and destroy range lock structure. */
 void zfs_range_unlock(rl_t *rl);
 
 /*
@@ -76,7 +72,8 @@ void zfs_range_unlock(rl_t *rl);
 void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
 
 /*
- * AVL comparison function used to compare range locks
+ * AVL comparison function used to order range locks
+ * Locks are ordered on the start offset of the range.
  */
 int zfs_range_compare(const void *arg1, const void *arg2);
 
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
index cf0bbee2ca..43986afda2 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
@@ -138,8 +138,9 @@ extern "C" {
 
 #define	ZFS_MAX_BLOCKSIZE	(SPA_MAXBLOCKSIZE)
 
-/* Path component length */
 /*
+ * Path component length
+ *
  * The generic fs code uses MAXNAMELEN to represent
  * what the largest component length is.  Unfortunately,
  * this length includes the terminating NULL.  ZFS needs
@@ -234,11 +235,7 @@ typedef struct znode {
 #define	ZTOV(ZP)	((ZP)->z_vnode)
 #define	VTOZ(VP)	((znode_t *)(VP)->v_data)
 
-/*
- * ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
- * ZFS_EXIT() must be called before exitting the vop.
- * ZFS_VERIFY_ZP() verifies the znode is valid.
- */
+/* Called on entry to each ZFS vnode and vfs operation  */
 #define	ZFS_ENTER(zfsvfs) \
 	{ \
 		rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
@@ -248,8 +245,10 @@ typedef struct znode {
 		} \
 	}
 
+/* Must be called before exiting the vop */
 #define	ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
 
+/* Verifies the znode is valid */
 #define	ZFS_VERIFY_ZP(zp) \
 	if ((zp)->z_sa_hdl == NULL) { \
 		ZFS_EXIT((zp)->z_zfsvfs); \
@@ -269,15 +268,14 @@ typedef struct znode {
 #define	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
 	mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
 
-/*
- * Macros to encode/decode ZFS stored time values from/to struct timespec
- */
+/* Encode ZFS stored time values from a struct timespec */
 #define	ZFS_TIME_ENCODE(tp, stmp)		\
 {						\
 	(stmp)[0] = (uint64_t)(tp)->tv_sec;	\
 	(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
 }
 
+/* Decode ZFS stored time values to a struct timespec */
 #define	ZFS_TIME_DECODE(tp, stmp)		\
 {						\
 	(tp)->tv_sec = (time_t)(stmp)[0];		\
diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h
index a212e4f0e1..15ef2aa8bf 100644
--- a/usr/src/uts/common/fs/zfs/sys/zil.h
+++ b/usr/src/uts/common/fs/zfs/sys/zil.h
@@ -242,6 +242,12 @@ typedef struct {
  * information needed for replaying the create.  If the
  * file doesn't have any actual ACEs then the lr_aclcnt
  * would be zero.
+ *
+ * After lr_acl_flags, there are a lr_acl_bytes number of variable sized ace's.
+ * If create is also setting xvattr's, then acl data follows xvattr.
+ * If ACE FUIDs are needed then they will follow the xvattr_t.  Following
+ * the FUIDs will be the domain table information.  The FUIDs for the owner
+ * and group will be in lr_create.  Name follows ACL data.
  */
 typedef struct {
 	lr_create_t	lr_create;	/* common create portion */
@@ -250,13 +256,6 @@ typedef struct {
 	uint64_t	lr_fuidcnt;	/* number of real fuids */
 	uint64_t	lr_acl_bytes;	/* number of bytes in ACL */
 	uint64_t	lr_acl_flags;	/* ACL flags */
-	/* lr_acl_bytes number of variable sized ace's follows */
-	/* if create is also setting xvattr's, then acl data follows xvattr */
-	/* if ACE FUIDs are needed then they will follow the xvattr_t */
-	/* Following the FUIDs will be the domain table information. */
-	/* The FUIDs for the owner and group will be in the lr_create */
-	/* portion of the record. */
-	/* name follows ACL data */
 } lr_acl_create_t;
 
 typedef struct {
diff --git a/usr/src/uts/common/fs/zfs/sys/zio_compress.h b/usr/src/uts/common/fs/zfs/sys/zio_compress.h
index 34a82a8b81..f4cb84511a 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio_compress.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio_compress.h
@@ -36,11 +36,10 @@
 extern "C" {
 #endif
 
-/*
- * Common signature for all zio compress/decompress functions.
- */
+/* Common signature for all zio compress functions. */
 typedef size_t zio_compress_func_t(void *src, void *dst,
     size_t s_len, size_t d_len, int);
+/* Common signature for all zio decompress functions. */
 typedef int zio_decompress_func_t(void *src, void *dst,
     size_t s_len, size_t d_len, int);
 
diff --git a/usr/src/uts/common/fs/zfs/txg.c b/usr/src/uts/common/fs/zfs/txg.c
index 62a0c605d7..8cdb284832 100644
--- a/usr/src/uts/common/fs/zfs/txg.c
+++ b/usr/src/uts/common/fs/zfs/txg.c
@@ -344,6 +344,12 @@ txg_rele_to_sync(txg_handle_t *th)
 	th->th_cpu = NULL;	/* defensive */
 }
 
+/*
+ * Blocks until all transactions in the group are committed.
+ *
+ * On return, the transaction group has reached a stable state in which it can
+ * then be passed off to the syncing context.
+ */
 static void
 txg_quiesce(dsl_pool_t *dp, uint64_t txg)
 {
@@ -394,6 +400,9 @@ txg_do_callbacks(list_t *cb_list)
 
 /*
  * Dispatch the commit callbacks registered on this txg to worker threads.
+ *
+ * If no callbacks are registered for a given TXG, nothing happens.
+ * This function creates a taskq for the associated pool, if needed.
  */
 static void
 txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
@@ -404,7 +413,10 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
 
 	for (c = 0; c < max_ncpus; c++) {
 		tx_cpu_t *tc = &tx->tx_cpu[c];
-		/* No need to lock tx_cpu_t at this point */
+		/*
+		 * No need to lock tx_cpu_t at this point, since this can
+		 * only be called once a txg has been synced.
+		 */
 
 		int g = txg & TXG_MASK;
 
@@ -424,7 +436,7 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
 		list_create(cb_list, sizeof (dmu_tx_callback_t),
 		    offsetof(dmu_tx_callback_t, dcb_node));
 
-		list_move_tail(&tc->tc_callbacks[g], cb_list);
+		list_move_tail(cb_list, &tc->tc_callbacks[g]);
 
 		(void) taskq_dispatch(tx->tx_commit_cb_taskq, (task_func_t *)
 		    txg_do_callbacks, cb_list, TQ_SLEEP);
@@ -558,8 +570,8 @@ txg_quiesce_thread(dsl_pool_t *dp)
 
 /*
  * Delay this thread by delay nanoseconds if we are still in the open
- * transaction group and there is already a waiting txg quiesing or quiesced.
- * Abort the delay if this txg stalls or enters the quiesing state.
+ * transaction group and there is already a waiting txg quiescing or quiesced.
+ * Abort the delay if this txg stalls or enters the quiescing state.
  */
 void
 txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution)
@@ -567,7 +579,7 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution)
 	tx_state_t *tx = &dp->dp_tx;
 	hrtime_t start = gethrtime();
 
-	/* don't delay if this txg could transition to quiesing immediately */
+	/* don't delay if this txg could transition to quiescing immediately */
 	if (tx->tx_open_txg > txg ||
 	    tx->tx_syncing_txg == txg-1 || tx->tx_synced_txg == txg-1)
 		return;
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index cc3594ad1a..7a409bd7ed 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -956,9 +956,11 @@ vdev_probe_done(zio_t *zio)
 }
 
 /*
- * Determine whether this device is accessible by reading and writing
- * to several known locations: the pad regions of each vdev label
- * but the first (which we leave alone in case it contains a VTOC).
+ * Determine whether this device is accessible.
+ *
+ * Read and write to several known locations: the pad regions of each
+ * vdev label but the first, which we leave alone in case it contains
+ * a VTOC.
  */
 zio_t *
 vdev_probe(vdev_t *vd, zio_t *zio)
@@ -2179,10 +2181,12 @@ vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux)
 }
 
 /*
- * Online the given vdev.  If 'unspare' is set, it implies two things.  First,
- * any attached spare device should be detached when the device finishes
- * resilvering.  Second, the online should be treated like a 'test' online case,
- * so no FMA events are generated if the device fails to open.
+ * Online the given vdev.
+ *
+ * If 'ZFS_ONLINE_UNSPARE' is set, it implies two things.  First, any attached
+ * spare device should be detached when the device finishes resilvering.
+ * Second, the online should be treated like a 'test' online case, so no FMA
+ * events are generated if the device fails to open.
  */
 int
 vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c
index 5ee7c7d15b..904918c3a4 100644
--- a/usr/src/uts/common/fs/zfs/vdev_label.c
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c
@@ -1028,6 +1028,7 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
 	zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
 }
 
+/* Sync the uberblocks to all vdevs in svd[] */
 int
 vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
 {
diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c
index fccbbb1d75..8de4b324a2 100644
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c
@@ -38,13 +38,14 @@
 /*
  * These tunables are for performance analysis.
  */
+
+/* The maximum number of I/Os concurrently pending to each device. */
+int zfs_vdev_max_pending = 10;
+
 /*
- * zfs_vdev_max_pending is the maximum number of i/os concurrently
- * pending to each device.  zfs_vdev_min_pending is the initial number
- * of i/os pending to each device (before it starts ramping up to
- * max_pending).
+ * The initial number of I/Os pending to each device, before it starts ramping
+ * up to zfs_vdev_max_pending.
  */
-int zfs_vdev_max_pending = 10;
 int zfs_vdev_min_pending = 4;
 
 /*
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index d4b3d5b5a8..b22bcd2b2f 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -64,6 +64,7 @@
  *   o addition (+) is represented by a bitwise XOR
  *   o subtraction (-) is therefore identical to addition: A + B = A - B
  *   o multiplication of A by 2 is defined by the following bitwise expression:
+ *
  *	(A * 2)_7 = A_6
  *	(A * 2)_6 = A_5
  *	(A * 2)_5 = A_4
@@ -122,7 +123,7 @@ typedef struct raidz_map {
 	uint64_t rm_missingparity;	/* Count of missing parity devices */
 	uint64_t rm_firstdatacol;	/* First data column/parity count */
 	uint64_t rm_nskip;		/* Skipped sectors for padding */
-	uint64_t rm_skipstart;	/* Column index of padding start */
+	uint64_t rm_skipstart;		/* Column index of padding start */
 	void *rm_datacopy;		/* rm_asize-buffer of copied data */
 	uintptr_t rm_reports;		/* # of referencing checksum reports */
 	uint8_t	rm_freed;		/* map no longer has referencing ZIO */
@@ -164,10 +165,7 @@ typedef struct raidz_map {
  */
 int vdev_raidz_default_to_general;
 
-/*
- * These two tables represent powers and logs of 2 in the Galois field defined
- * above. These values were computed by repeatedly multiplying by 2 as above.
- */
+/* Powers of 2 in the Galois field defined above. */
 static const uint8_t vdev_raidz_pow2[256] = {
 	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
 	0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
@@ -202,6 +200,7 @@ static const uint8_t vdev_raidz_pow2[256] = {
 	0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
 	0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
 };
+/* Logs of 2 in the Galois field defined above. */
 static const uint8_t vdev_raidz_log2[256] = {
 	0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
 	0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
@@ -437,23 +436,50 @@ static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
 	vdev_raidz_cksum_report
 };
 
+/*
+ * Divides the IO evenly across all child vdevs; usually, dcols is
+ * the number of children in the target vdev.
+ */
 static raidz_map_t *
 vdev_raidz_map_alloc(caddr_t data, uint64_t size, uint64_t offset,
     uint64_t unit_shift, uint64_t dcols, uint64_t nparity)
 {
 	raidz_map_t *rm;
+	/* The starting RAIDZ (parent) vdev sector of the block. */
 	uint64_t b = offset >> unit_shift;
+	/* The zio's size in units of the vdev's minimum sector size. */
 	uint64_t s = size >> unit_shift;
+	/* The first column for this stripe. */
 	uint64_t f = b % dcols;
+	/* The starting byte offset on each child vdev. */
 	uint64_t o = (b / dcols) << unit_shift;
 	uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
 
+	/*
+	 * "Quotient": The number of data sectors for this stripe on all but
+	 * the "big column" child vdevs that also contain "remainder" data.
+	 */
 	q = s / (dcols - nparity);
+
+	/*
+	 * "Remainder": The number of partial stripe data sectors in this I/O.
+	 * This will add a sector to some, but not all, child vdevs.
+	 */
 	r = s - q * (dcols - nparity);
+
+	/* The number of "big columns" - those which contain remainder data. */
 	bc = (r == 0 ? 0 : r + nparity);
+
+	/*
+	 * The total number of data and parity sectors associated with
+	 * this I/O.
+	 */
 	tot = s + nparity * (q + (r == 0 ? 0 : 1));
 
+	/* acols: The columns that will be accessed. */
+	/* scols: The columns that will be accessed or skipped. */
 	if (q == 0) {
+		/* Our I/O request doesn't span all child vdevs. */
 		acols = bc;
 		scols = MIN(dcols, roundup(bc, nparity + 1));
 	} else {
@@ -1668,6 +1694,23 @@ vdev_raidz_child_done(zio_t *zio)
 	rc->rc_skipped = 0;
 }
 
+/*
+ * Start an IO operation on a RAIDZ VDev
+ *
+ * Outline:
+ * - For write operations:
+ *   1. Generate the parity data
+ *   2. Create child zio write operations to each column's vdev, for both
+ *      data and parity.
+ *   3. If the column skips any sectors for padding, create optional dummy
+ *      write zio children for those areas to improve aggregation continuity.
+ * - For read operations:
+ *   1. Create child zio read operations to each data column's vdev to read
+ *      the range of data required for zio.
+ *   2. If this is a scrub or resilver operation, or if any of the data
+ *      vdevs have had errors, then create zio read operations to the parity
+ *      columns' VDevs as well.
+ */
 static int
 vdev_raidz_io_start(zio_t *zio)
 {
@@ -2019,6 +2062,27 @@ done:
 	return (ret);
 }
 
+/*
+ * Complete an IO operation on a RAIDZ VDev
+ *
+ * Outline:
+ * - For write operations:
+ *   1. Check for errors on the child IOs.
+ *   2. Return, setting an error code if too few child VDevs were written
+ *      to reconstruct the data later.  Note that partial writes are
+ *      considered successful if they can be reconstructed at all.
+ * - For read operations:
+ *   1. Check for errors on the child IOs.
+ *   2. If data errors occurred:
+ *      a. Try to reassemble the data from the parity available.
+ *      b. If we haven't yet read the parity drives, read them now.
+ *      c. If all parity drives have been read but the data still doesn't
+ *         reassemble with a correct checksum, then try combinatorial
+ *         reconstruction.
+ *      d. If that doesn't work, return an error.
+ *   3. If there were unexpected errors or this is a resilver operation,
+ *      rewrite the vdevs that had errors.
+ */
 static void
 vdev_raidz_io_done(zio_t *zio)
 {
diff --git a/usr/src/uts/common/fs/zfs/zap.c b/usr/src/uts/common/fs/zfs/zap.c
index 2f4ccfb6ea..f69c1b0312 100644
--- a/usr/src/uts/common/fs/zfs/zap.c
+++ b/usr/src/uts/common/fs/zfs/zap.c
@@ -295,7 +295,8 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
 		err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
 		    (tbl->zt_nextblk + blk) << bs, FTAG, &db,
 		    DMU_READ_NO_PREFETCH);
-		dmu_buf_rele(db, FTAG);
+		if (err == 0)
+			dmu_buf_rele(db, FTAG);
 	}
 	return (err);
 }
@@ -992,18 +993,21 @@ zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx)
 	zap_attribute_t za;
 	int err;
 
+	err = 0;
 	for (zap_cursor_init(&zc, os, fromobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    (void) zap_cursor_advance(&zc)) {
-		if (za.za_integer_length != 8 || za.za_num_integers != 1)
-			return (SET_ERROR(EINVAL));
+		if (za.za_integer_length != 8 || za.za_num_integers != 1) {
+			err = SET_ERROR(EINVAL);
+			break;
+		}
 		err = zap_add(os, intoobj, za.za_name,
 		    8, 1, &za.za_first_integer, tx);
 		if (err)
-			return (err);
+			break;
 	}
 	zap_cursor_fini(&zc);
-	return (0);
+	return (err);
 }
 
 int
@@ -1014,18 +1018,21 @@ zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
 	zap_attribute_t za;
 	int err;
 
+	err = 0;
 	for (zap_cursor_init(&zc, os, fromobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    (void) zap_cursor_advance(&zc)) {
-		if (za.za_integer_length != 8 || za.za_num_integers != 1)
-			return (SET_ERROR(EINVAL));
+		if (za.za_integer_length != 8 || za.za_num_integers != 1) {
+			err = SET_ERROR(EINVAL);
+			break;
+		}
 		err = zap_add(os, intoobj, za.za_name,
 		    8, 1, &value, tx);
 		if (err)
-			return (err);
+			break;
 	}
 	zap_cursor_fini(&zc);
-	return (0);
+	return (err);
 }
 
 int
@@ -1036,24 +1043,27 @@ zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
 	zap_attribute_t za;
 	int err;
 
+	err = 0;
 	for (zap_cursor_init(&zc, os, fromobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    (void) zap_cursor_advance(&zc)) {
 		uint64_t delta = 0;
 
-		if (za.za_integer_length != 8 || za.za_num_integers != 1)
-			return (SET_ERROR(EINVAL));
+		if (za.za_integer_length != 8 || za.za_num_integers != 1) {
+			err = SET_ERROR(EINVAL);
+			break;
+		}
 
 		err = zap_lookup(os, intoobj, za.za_name, 8, 1, &delta);
 		if (err != 0 && err != ENOENT)
-			return (err);
+			break;
 		delta += za.za_first_integer;
 		err = zap_update(os, intoobj, za.za_name, 8, 1, &delta, tx);
 		if (err)
-			return (err);
+			break;
 	}
 	zap_cursor_fini(&zc);
-	return (0);
+	return (err);
 }
 
 int
diff --git a/usr/src/uts/common/fs/zfs/zfs_acl.c b/usr/src/uts/common/fs/zfs/zfs_acl.c
index 1b296b2897..2eecefd8cf 100644
--- a/usr/src/uts/common/fs/zfs/zfs_acl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_acl.c
@@ -1362,7 +1362,8 @@ zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
 		zacep = (void *)((uintptr_t)zacep + abstract_size);
 		new_count++;
 		new_bytes += abstract_size;
-	} if (masks.deny1) {
+	}
+	if (masks.deny1) {
 		zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
 		zacep = (void *)((uintptr_t)zacep + abstract_size);
 		new_count++;
@@ -1766,7 +1767,7 @@ zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
 }
 
 /*
- * Retrieve a files ACL
+ * Retrieve a file's ACL
  */
 int
 zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
@@ -1921,7 +1922,7 @@ zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
 }
 
 /*
- * Set a files ACL
+ * Set a file's ACL
  */
 int
 zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
@@ -2342,6 +2343,7 @@ slow:
 
 /*
  * Determine whether Access should be granted/denied.
+ *
  * The least priv subsytem is always consulted as a basic privilege
  * can define any form of access.
  */
@@ -2537,7 +2539,6 @@ zfs_delete_final_check(znode_t *zp, znode_t *dzp,
  * Determine whether Access should be granted/deny, without
  * consulting least priv subsystem.
  *
- *
  * The following chart is the recommended NFSv4 enforcement for
  * ability to delete an object.
  *
diff --git a/usr/src/uts/common/fs/zfs/zfs_ctldir.c b/usr/src/uts/common/fs/zfs/zfs_ctldir.c
index f915d21db2..5928fe75e9 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ctldir.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ctldir.c
@@ -505,6 +505,11 @@ static const fs_operation_def_t zfsctl_tops_root[] = {
 	{ NULL }
 };
 
+/*
+ * Gets the full dataset name that corresponds to the given snapshot name
+ * Example:
+ * 	zfsctl_snapshot_zname("snap1") -> "mypool/myfs@snap1"
+ */
 static int
 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
 {
@@ -1046,6 +1051,7 @@ zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
 
 /*
  * pvp is the '.zfs' directory (zfsctl_node_t).
+ *
  * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
  *
  * This function is the callback to create a GFS vnode for '.zfs/snapshot'
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 15b19b0d06..06ea64181d 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -323,9 +323,7 @@ zfs_is_bootfs(const char *name)
 }
 
 /*
- * zfs_earlier_version
- *
- *	Return non-zero if the spa version is less than requested version.
+ * Return non-zero if the spa version is less than requested version.
  */
 static int
 zfs_earlier_version(const char *name, int version)
@@ -343,8 +341,6 @@ zfs_earlier_version(const char *name, int version)
 }
 
 /*
- * zpl_earlier_version
- *
  * Return TRUE if the ZPL version is less than requested version.
  */
 static boolean_t
@@ -2986,10 +2982,10 @@ zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 
 /*
  * inputs:
- * createprops		list of properties requested by creator
- * default_zplver	zpl version to use if unspecified in createprops
- * fuids_ok		fuids allowed in this version of the spa?
  * os			parent objset pointer (NULL if root fs)
+ * fuids_ok		fuids allowed in this version of the spa?
+ * sa_ok		SAs allowed in this version of the spa?
+ * createprops		list of properties requested by creator
  *
  * outputs:
  * zplprops	values for the zplprops we attach to the master node object
@@ -3395,41 +3391,44 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
  *
  * This function is best-effort.  Callers must deal gracefully if it
  * remains mounted (or is remounted after this call).
+ *
+ * Returns 0 if the argument is not a snapshot, or it is not currently a
+ * filesystem, or we were able to unmount it.  Returns error code otherwise.
  */
-void
+int
 zfs_unmount_snap(const char *snapname)
 {
 	vfs_t *vfsp;
 	zfsvfs_t *zfsvfs;
+	int err;
 
 	if (strchr(snapname, '@') == NULL)
-		return;
+		return (0);
 
 	vfsp = zfs_get_vfs(snapname);
 	if (vfsp == NULL)
-		return;
+		return (0);
 
 	zfsvfs = vfsp->vfs_data;
 	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
 
-	if (vn_vfswlock(vfsp->vfs_vnodecovered) != 0) {
-		VFS_RELE(vfsp);
-		return;
-	}
+	err = vn_vfswlock(vfsp->vfs_vnodecovered);
 	VFS_RELE(vfsp);
+	if (err != 0)
+		return (SET_ERROR(err));
 
 	/*
 	 * Always force the unmount for snapshots.
 	 */
 	(void) dounmount(vfsp, MS_FORCE, kcred);
+	return (0);
 }
 
 /* ARGSUSED */
 static int
 zfs_unmount_snap_cb(const char *snapname, void *arg)
 {
-	zfs_unmount_snap(snapname);
-	return (0);
+	return (zfs_unmount_snap(snapname));
 }
 
 /*
@@ -3452,7 +3451,7 @@ zfs_destroy_unmount_origin(const char *fsname)
 		char originname[MAXNAMELEN];
 		dsl_dataset_name(ds->ds_prev, originname);
 		dmu_objset_rele(os, FTAG);
-		zfs_unmount_snap(originname);
+		(void) zfs_unmount_snap(originname);
 	} else {
 		dmu_objset_rele(os, FTAG);
 	}
@@ -3470,7 +3469,7 @@ zfs_destroy_unmount_origin(const char *fsname)
 static int
 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	int poollen;
+	int error, poollen;
 	nvlist_t *snaps;
 	nvpair_t *pair;
 	boolean_t defer;
@@ -3491,7 +3490,9 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 		    (name[poollen] != '/' && name[poollen] != '@'))
 			return (SET_ERROR(EXDEV));
 
-		zfs_unmount_snap(name);
+		error = zfs_unmount_snap(name);
+		if (error != 0)
+			return (error);
 	}
 
 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
@@ -3509,8 +3510,12 @@ static int
 zfs_ioc_destroy(zfs_cmd_t *zc)
 {
 	int err;
-	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS)
-		zfs_unmount_snap(zc->zc_name);
+
+	if (zc->zc_objset_type == DMU_OST_ZFS) {
+		err = zfs_unmount_snap(zc->zc_name);
+		if (err != 0)
+			return (err);
+	}
 
 	if (strchr(zc->zc_name, '@'))
 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
@@ -3556,8 +3561,7 @@ recursive_unmount(const char *fsname, void *arg)
 	char fullname[MAXNAMELEN];
 
 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
-	zfs_unmount_snap(fullname);
-	return (0);
+	return (zfs_unmount_snap(fullname));
 }
 
 /*
@@ -5016,14 +5020,18 @@ static int
 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 {
 	nvpair_t *pair;
+	int err;
 
 	/*
 	 * The release may cause the snapshot to be destroyed; make sure it
 	 * is not mounted.
 	 */
 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(holds, pair))
-		zfs_unmount_snap(nvpair_name(pair));
+	    pair = nvlist_next_nvpair(holds, pair)) {
+		err = zfs_unmount_snap(nvpair_name(pair));
+		if (err != 0)
+			return (err);
+	}
 
 	return (dsl_dataset_user_release(holds, errlist));
 }
diff --git a/usr/src/uts/common/fs/zfs/zfs_log.c b/usr/src/uts/common/fs/zfs/zfs_log.c
index de786bf7f4..aeaba2233a 100644
--- a/usr/src/uts/common/fs/zfs/zfs_log.c
+++ b/usr/src/uts/common/fs/zfs/zfs_log.c
@@ -211,9 +211,8 @@ zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
 }
 
 /*
- * zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR,
- * TX_MKDIR_ATTR and TX_MKXATTR
- * transactions.
+ * Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
+ * TK_MKXATTR transactions.
  *
  * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID
  * domain information appended prior to the name.  In this case the
@@ -340,7 +339,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
 }
 
 /*
- * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions.
+ * Handles both TX_REMOVE and TX_RMDIR transactions.
  */
 void
 zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
@@ -364,7 +363,7 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
 }
 
 /*
- * zfs_log_link() handles TX_LINK transactions.
+ * Handles TX_LINK transactions.
  */
 void
 zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
@@ -387,7 +386,7 @@ zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
 }
 
 /*
- * zfs_log_symlink() handles TX_SYMLINK transactions.
+ * Handles TX_SYMLINK transactions.
  */
 void
 zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
@@ -419,7 +418,7 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
 }
 
 /*
- * zfs_log_rename() handles TX_RENAME transactions.
+ * Handles TX_RENAME transactions.
  */
 void
 zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
@@ -445,7 +444,7 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
 }
 
 /*
- * zfs_log_write() handles TX_WRITE transactions.
+ * Handles TX_WRITE transactions.
  */
 ssize_t zfs_immediate_write_sz = 32768;
 
@@ -524,7 +523,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 }
 
 /*
- * zfs_log_truncate() handles TX_TRUNCATE transactions.
+ * Handles TX_TRUNCATE transactions.
  */
 void
 zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
@@ -547,7 +546,7 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 }
 
 /*
- * zfs_log_setattr() handles TX_SETATTR transactions.
+ * Handles TX_SETATTR transactions.
  */
 void
 zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
@@ -609,7 +608,7 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 }
 
 /*
- * zfs_log_acl() handles TX_ACL transactions.
+ * Handles TX_ACL transactions.
  */
 void
 zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
diff --git a/usr/src/uts/common/fs/zfs/zfs_rlock.c b/usr/src/uts/common/fs/zfs/zfs_rlock.c
index be562496b0..b40bdbea12 100644
--- a/usr/src/uts/common/fs/zfs/zfs_rlock.c
+++ b/usr/src/uts/common/fs/zfs/zfs_rlock.c
@@ -28,7 +28,7 @@
 
 /*
  * This file contains the code to implement file range locking in
- * ZFS, although there isn't much specific to ZFS (all that comes to mind
+ * ZFS, although there isn't much specific to ZFS (all that comes to mind is
  * support for growing the blocksize).
  *
  * Interface
diff --git a/usr/src/uts/common/fs/zfs/zfs_sa.c b/usr/src/uts/common/fs/zfs/zfs_sa.c
index d141e43d72..ed5f276475 100644
--- a/usr/src/uts/common/fs/zfs/zfs_sa.c
+++ b/usr/src/uts/common/fs/zfs/zfs_sa.c
@@ -187,7 +187,7 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
 /*
  * I'm not convinced we should do any of this upgrade.
  * since the SA code can read both old/new znode formats
- * with probably little to know performance difference.
+ * with probably little to no performance difference.
  *
  * All new files will be created with the new format.
  */
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index e337861cd4..c7d4444722 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -1349,13 +1349,12 @@ zfs_parse_bootfs(char *bpath, char *outpath)
 }
 
 /*
- * zfs_check_global_label:
- *	Check that the hex label string is appropriate for the dataset
- *	being mounted into the global_zone proper.
+ * Check that the hex label string is appropriate for the dataset being
+ * mounted into the global_zone proper.
  *
- *	Return an error if the hex label string is not default or
- *	admin_low/admin_high.  For admin_low labels, the corresponding
- *	dataset must be readonly.
+ * Return an error if the hex label string is not default or
+ * admin_low/admin_high.  For admin_low labels, the corresponding
+ * dataset must be readonly.
  */
 int
 zfs_check_global_label(const char *dsname, const char *hexsl)
@@ -1377,15 +1376,12 @@ zfs_check_global_label(const char *dsname, const char *hexsl)
 }
 
 /*
- * zfs_mount_label_policy:
- *	Determine whether the mount is allowed according to MAC check.
- *	by comparing (where appropriate) label of the dataset against
- *	the label of the zone being mounted into.  If the dataset has
- *	no label, create one.
+ * Determine whether the mount is allowed according to MAC check.
+ * by comparing (where appropriate) label of the dataset against
+ * the label of the zone being mounted into.  If the dataset has
+ * no label, create one.
  *
- *	Returns:
- *		 0 :	access allowed
- *		>0 :	error code, such as EACCES
+ * Returns 0 if access allowed, error otherwise (e.g. EACCES)
  */
 static int
 zfs_mount_label_policy(vfs_t *vfsp, char *osname)
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 161e573175..b0901bea0c 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -95,11 +95,11 @@
  * The ordering of events is important to avoid deadlocks and references
  * to freed memory.  The example below illustrates the following Big Rules:
  *
- *  (1) A check must be made in each zfs thread for a mounted file system.
+ *  (1)	A check must be made in each zfs thread for a mounted file system.
  *	This is done avoiding races using ZFS_ENTER(zfsvfs).
- *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
- *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
- *      can return EIO from the calling function.
+ *	A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
+ *	must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *	can return EIO from the calling function.
  *
  *  (2)	VN_RELE() should always be the last thing except for zil_commit()
  *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
@@ -131,7 +131,7 @@
  *  (5)	If the operation succeeded, generate the intent log entry for it
  *	before dropping locks.  This ensures that the ordering of events
  *	in the intent log matches the order in which they actually occurred.
- *      During ZIL replay the zfs_log_* functions will update the sequence
+ *	During ZIL replay the zfs_log_* functions will update the sequence
  *	number to indicate the zil transaction has replayed.
  *
  *  (6)	At the end of each vnode op, the DMU tx must always commit,
@@ -388,7 +388,7 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid)
  *		else we default from the dmu buffer.
  *
  * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- *	the file is memory mapped.
+ *	 the file is memory mapped.
  */
 static int
 mappedread(vnode_t *vp, int nbytes, uio_t *uio)
@@ -437,8 +437,7 @@ offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
  *
  *	OUT:	uio	- updated offset and range, buffer filled.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Side Effects:
  *	vp - atime updated if byte count > 0
@@ -574,14 +573,14 @@ out:
  *	IN:	vp	- vnode of file to be written to.
  *		uio	- structure supplying write location, range info,
  *			  and data buffer.
- *		ioflag	- FAPPEND flag set if in append mode.
+ *		ioflag	- FAPPEND, FSYNC, and/or FDSYNC.  FAPPEND is
+ *			  set if in append mode.
  *		cr	- credentials of caller.
  *		ct	- caller context (NFS/CIFS fem monitor only)
  *
  *	OUT:	uio	- updated offset and range.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - ctime|mtime updated if byte count > 0
@@ -1149,8 +1148,7 @@ specvp_check(vnode_t **vpp, cred_t *cr)
  *
  *	OUT:	vpp	- vnode of located entry, NULL if not found.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	NA
@@ -1291,8 +1289,7 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
  *
  *	OUT:	vpp	- vnode of created or trunc'd entry.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated if new entry created
@@ -1542,8 +1539,7 @@ out:
  *		ct	- caller context
  *		flags	- case flags
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime
@@ -1773,12 +1769,12 @@ out:
  *		vap	- attributes of new directory.
  *		cr	- credentials of caller.
  *		ct	- caller context
+ *		flags	- case flags
  *		vsecp	- ACL to be set
  *
  *	OUT:	vpp	- vnode of created directory.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
@@ -1958,8 +1954,7 @@ top:
  *		ct	- caller context
  *		flags	- case flags
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
@@ -2077,7 +2072,7 @@ out:
 /*
  * Read as many directory entries as will fit into the provided
  * buffer from the given directory cursor position (specified in
- * the uio structure.
+ * the uio structure).
  *
  *	IN:	vp	- vnode of directory to read.
  *		uio	- structure supplying read location, range info,
@@ -2089,8 +2084,7 @@ out:
  *	OUT:	uio	- updated offset and range, buffer filled.
  *		eofp	- set to true if end-of-file detected.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
@@ -2409,7 +2403,7 @@ zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
  *
  *	OUT:	vap	- attribute values.
  *
- *	RETURN:	0 (always succeeds)
+ *	RETURN:	0 (always succeeds).
  */
 /* ARGSUSED */
 static int
@@ -2611,8 +2605,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - ctime updated, mtime updated if size changed.
@@ -2620,7 +2613,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 /* ARGSUSED */
 static int
 zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
-	caller_context_t *ct)
+    caller_context_t *ct)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
@@ -3213,6 +3206,7 @@ out:
 
 	if (attrzp)
 		VN_RELE(ZTOV(attrzp));
+
 	if (aclp)
 		zfs_acl_free(aclp);
 
@@ -3347,8 +3341,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
  *		ct	- caller context
  *		flags	- case flags
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	sdvp,tdvp - ctime|mtime updated
@@ -3695,13 +3688,11 @@ out:
  *	IN:	dvp	- Directory to contain new symbolic link.
  *		link	- Name for new symlink entry.
  *		vap	- Attributes of new entry.
- *		target	- Target path of new symlink.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
@@ -3847,14 +3838,13 @@ top:
  * the symbolic path referred to by vp.
  *
  *	IN:	vp	- vnode of symbolic link.
- *		uoip	- structure to contain the link path.
+ *		uio	- structure to contain the link path.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
- *	OUT:	uio	- structure to contain the link path.
+ *	OUT:	uio	- structure containing the link path.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
@@ -3893,8 +3883,7 @@ zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	tdvp - ctime|mtime updated
@@ -4062,8 +4051,7 @@ zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
  *	OUT:	offp	- start of range pushed.
  *		lenp	- len of range pushed.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * NOTE: callers must have locked the page to be pushed.  On
  * exit, the page (and all other pages in the kluster) must be
@@ -4187,8 +4175,7 @@ out:
  *		cr	- credentials of caller.
  *		ct	- caller context.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - ctime|mtime updated
@@ -4353,8 +4340,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
  *		noffp	- pointer to new file offset
  *		ct	- caller context
  *
- *	RETURN:	0 if success
- *		EINVAL if new offset invalid
+ *	RETURN:	0 on success, EINVAL if new offset invalid.
  */
 /* ARGSUSED */
 static int
@@ -4490,8 +4476,7 @@ zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
  *	OUT:	protp	- protection mode of created pages.
  *		pl	- list of pages created.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
@@ -4499,8 +4484,8 @@ zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
 /* ARGSUSED */
 static int
 zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
-	page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
-	enum seg_rw rw, cred_t *cr, caller_context_t *ct)
+    page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
+    enum seg_rw rw, cred_t *cr, caller_context_t *ct)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
@@ -4575,15 +4560,11 @@ out:
  * Request a memory map for a section of a file.  This code interacts
  * with common code and the VM system as follows:
  *
- *	common code calls mmap(), which ends up in smmap_common()
- *
- *	this calls VOP_MAP(), which takes you into (say) zfs
- *
- *	zfs_map() calls as_map(), passing segvn_create() as the callback
- *
- *	segvn_create() creates the new segment and calls VOP_ADDMAP()
- *
- *	zfs_addmap() updates z_mapcnt
+ * - common code calls mmap(), which ends up in smmap_common()
+ * - this calls VOP_MAP(), which takes you into (say) zfs
+ * - zfs_map() calls as_map(), passing segvn_create() as the callback
+ * - segvn_create() creates the new segment and calls VOP_ADDMAP()
+ * - zfs_addmap() updates z_mapcnt
  */
 /*ARGSUSED*/
 static int
@@ -4700,8 +4681,7 @@ zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
  *		cr	- credentials of caller [UNUSED].
  *		ct	- caller context.
  *
- *	RETURN:	0 if success
- *		error code if failure
+ *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - ctime|mtime updated
@@ -4916,13 +4896,14 @@ zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
 }
 
 /*
- * Tunable, both must be a power of 2.
- *
- * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf
- * zcr_blksz_max: if set to less than the file block size, allow loaning out of
- *                an arcbuf for a partial block read
+ * The smallest read we may consider to loan out an arcbuf.
+ * This must be a power of 2.
  */
 int zcr_blksz_min = (1 << 10);	/* 1K */
+/*
+ * If set to less than the file block size, allow loaning out of an
+ * arcbuf for a partial block read.  This must be a power of 2.
+ */
 int zcr_blksz_max = (1 << 17);	/* 128K */
 
 /*ARGSUSED*/
@@ -5196,10 +5177,12 @@ const fs_operation_def_t zfs_sharevnodeops_template[] = {
 
 /*
  * Extended attribute directory vnode operations template
- *	This template is identical to the directory vnodes
- *	operation template except for restricted operations:
- *		VOP_MKDIR()
- *		VOP_SYMLINK()
+ *
+ * This template is identical to the directory vnodes
+ * operation template except for restricted operations:
+ *	VOP_MKDIR()
+ *	VOP_SYMLINK()
+ *
  * Note that there are other restrictions embedded in:
  *	zfs_create()	- restrict type to VREG
  *	zfs_link()	- no links into/out of attribute space
diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c
index 7d4f3084ed..f9433b6b44 100644
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c
@@ -1006,9 +1006,8 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 }
 
 /*
- * zfs_xvattr_set only updates the in-core attributes
- * it is assumed the caller will be doing an sa_bulk_update
- * to push the changes out
+ * Update in-core attributes.  It is assumed the caller will be doing an
+ * sa_bulk_update to push the changes out.
  */
 void
 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
@@ -1447,8 +1446,7 @@ zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
  *	IN:	zp	- znode of file to free data in.
  *		end	- new end-of-file
  *
- * 	RETURN:	0 if success
- *		error code if failure
+ * 	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_extend(znode_t *zp, uint64_t end)
@@ -1525,8 +1523,7 @@ top:
  *		off	- start of section to free.
  *		len	- length of section to free.
  *
- * 	RETURN:	0 if success
- *		error code if failure
+ * 	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
@@ -1564,8 +1561,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
  *	IN:	zp	- znode of file to free data in.
  *		end	- new end-of-file.
  *
- * 	RETURN:	0 if success
- *		error code if failure
+ * 	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_trunc(znode_t *zp, uint64_t end)
@@ -1663,8 +1659,7 @@ top:
  *		flag	- current file open mode flags.
  *		log	- TRUE if this action should be logged
  *
- * 	RETURN:	0 if success
- *		error code if failure
+ * 	RETURN:	0 on success, error code on failure
  */
 int
 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c
index 8142e24210..da809916a4 100644
--- a/usr/src/uts/common/fs/zfs/zil.c
+++ b/usr/src/uts/common/fs/zfs/zil.c
@@ -66,9 +66,9 @@
  */
 
 /*
- * This global ZIL switch affects all pools
+ * Disable intent logging replay.  This global ZIL switch affects all pools.
  */
-int zil_replay_disable = 0;    /* disable intent logging replay */
+int zil_replay_disable = 0;
 
 /*
  * Tunable parameter for debugging or performance analysis.  Setting
@@ -879,6 +879,7 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
 
 /*
  * Define a limited set of intent log block sizes.
+ *
  * These must be a multiple of 4KB. Note only the amount used (again
  * aligned to 4KB) actually gets written. However, we can't always just
  * allocate SPA_MAXBLOCKSIZE as the slog space could be exhausted.
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index 425dcb3100..30738dce53 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -1193,13 +1193,16 @@ zio_interrupt(zio_t *zio)
 
 /*
  * Execute the I/O pipeline until one of the following occurs:
- * (1) the I/O completes; (2) the pipeline stalls waiting for
- * dependent child I/Os; (3) the I/O issues, so we're waiting
- * for an I/O completion interrupt; (4) the I/O is delegated by
- * vdev-level caching or aggregation; (5) the I/O is deferred
- * due to vdev-level queueing; (6) the I/O is handed off to
- * another thread.  In all cases, the pipeline stops whenever
- * there's no CPU work; it never burns a thread in cv_wait().
+ *
+ *	(1) the I/O completes
+ *	(2) the pipeline stalls waiting for dependent child I/Os
+ *	(3) the I/O issues, so we're waiting for an I/O completion interrupt
+ *	(4) the I/O is delegated by vdev-level caching or aggregation
+ *	(5) the I/O is deferred due to vdev-level queueing
+ *	(6) the I/O is handed off to another thread.
+ *
+ * In all cases, the pipeline stops whenever there's no CPU work; it never
+ * burns a thread in cv_wait().
  *
  * There's no locking on io_stage because there's no legitimate way
  * for multiple threads to be attempting to process the same I/O.
diff --git a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c
index 73cef1199b..bea112d166 100644
--- a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c
+++ b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -105,13 +106,14 @@ static	int ahci_check_slot_handle(ahci_port_t *, int);
 /*
  * Local function prototypes
  */
+static	int ahci_setup_port_base_addresses(ahci_ctl_t *, ahci_port_t *);
 static	int ahci_alloc_ports_state(ahci_ctl_t *);
 static	void ahci_dealloc_ports_state(ahci_ctl_t *);
 static	int ahci_alloc_port_state(ahci_ctl_t *, uint8_t);
 static	void ahci_dealloc_port_state(ahci_ctl_t *, uint8_t);
-static	int ahci_alloc_rcvd_fis(ahci_ctl_t *, ahci_port_t *, uint8_t);
+static	int ahci_alloc_rcvd_fis(ahci_ctl_t *, ahci_port_t *);
 static	void ahci_dealloc_rcvd_fis(ahci_port_t *);
-static	int ahci_alloc_cmd_list(ahci_ctl_t *, ahci_port_t *, uint8_t);
+static	int ahci_alloc_cmd_list(ahci_ctl_t *, ahci_port_t *);
 static	void ahci_dealloc_cmd_list(ahci_ctl_t *, ahci_port_t *);
 static  int ahci_alloc_cmd_tables(ahci_ctl_t *, ahci_port_t *);
 static  void ahci_dealloc_cmd_tables(ahci_ctl_t *, ahci_port_t *);
@@ -122,6 +124,7 @@ static	int ahci_initialize_controller(ahci_ctl_t *);
 static	void ahci_uninitialize_controller(ahci_ctl_t *);
 static	int ahci_initialize_port(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *);
 static	int ahci_config_space_init(ahci_ctl_t *);
+static	void ahci_staggered_spin_up(ahci_ctl_t *, uint8_t);
 
 static	void ahci_drain_ports_taskq(ahci_ctl_t *);
 static	int ahci_rdwr_pmult(ahci_ctl_t *, ahci_addr_t *, uint8_t, uint32_t *,
@@ -454,6 +457,10 @@ _init(void)
 		goto err_out;
 	}
 
+	/* watchdog tick */
+	ahci_watchdog_tick = drv_usectohz(
+	    (clock_t)ahci_watchdog_timeout * 1000000);
+
 	ret = mod_install(&modlinkage);
 	if (ret != 0) {
 		sata_hba_fini(&modlinkage);
@@ -464,9 +471,6 @@ _init(void)
 		goto err_out;
 	}
 
-	/* watchdog tick */
-	ahci_watchdog_tick = drv_usectohz(
-	    (clock_t)ahci_watchdog_timeout * 1000000);
 	return (ret);
 
 err_out:
@@ -517,6 +521,7 @@ ahci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 	int status;
 	int attach_state;
 	uint32_t cap_status, ahci_version;
+	uint32_t ghc_control;
 	int intr_types;
 	int i;
 	pci_regspec_t *regs;
@@ -544,6 +549,16 @@ ahci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 		ahci_ctlp = ddi_get_soft_state(ahci_statep, instance);
 		mutex_enter(&ahci_ctlp->ahcictl_mutex);
 
+		/*
+		 * GHC.AE must be set to 1 before any other AHCI register
+		 * is accessed
+		 */
+		ghc_control = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp));
+		ghc_control |= AHCI_HBA_GHC_AE;
+		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp), ghc_control);
+
 		/* Restart watch thread */
 		if (ahci_ctlp->ahcictl_timeout_id == 0)
 			ahci_ctlp->ahcictl_timeout_id = timeout(
@@ -655,6 +670,16 @@ ahci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 
 	attach_state |= AHCI_ATTACH_STATE_REG_MAP;
 
+	/*
+	 * GHC.AE must be set to 1 before any other AHCI register
+	 * is accessed
+	 */
+	ghc_control = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp));
+	ghc_control |= AHCI_HBA_GHC_AE;
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp), ghc_control);
+
 	/* Get the AHCI version information */
 	ahci_version = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
 	    (uint32_t *)AHCI_GLOBAL_VS(ahci_ctlp));
@@ -678,6 +703,18 @@ ahci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "hba capabilities = 0x%x",
 	    cap_status);
 
+	/* CAP2 (HBA Capabilities Extended) is available since AHCI spec 1.2 */
+	if (ahci_version >= 0x00010200) {
+		uint32_t cap2_status;
+
+		/* Get the HBA capabilities extended information */
+		cap2_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_GLOBAL_CAP2(ahci_ctlp));
+
+		AHCIDBG(AHCIDBG_INIT, ahci_ctlp,
+		    "hba capabilities extended = 0x%x", cap2_status);
+	}
+
 #if AHCI_DEBUG
 	/* Get the interface speed supported by the HBA */
 	speed = (cap_status & AHCI_HBA_CAP_ISS) >> AHCI_HBA_CAP_ISS_SHIFT;
@@ -709,20 +746,12 @@ ahci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "hba implementation of ports: 0x%x",
 	    ahci_ctlp->ahcictl_ports_implemented);
 
-	/*
-	 * According to the AHCI spec, CAP.NP should indicate the maximum
-	 * number of ports supported by the HBA silicon, but we found
-	 * this value of ICH8 chipset only indicates the number of ports
-	 * implemented (exposed) by it. Therefore, the driver should calculate
-	 * the potential maximum value by checking PI register, and use
-	 * the maximum of this value and CAP.NP.
-	 */
-	ahci_ctlp->ahcictl_num_ports = max(
-	    (cap_status & AHCI_HBA_CAP_NP) + 1,
-	    ddi_fls(ahci_ctlp->ahcictl_ports_implemented));
+	/* Max port number implemented */
+	ahci_ctlp->ahcictl_num_ports =
+	    ddi_fls(ahci_ctlp->ahcictl_ports_implemented);
 
 	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "hba number of ports: %d",
-	    ahci_ctlp->ahcictl_num_ports);
+	    (cap_status & AHCI_HBA_CAP_NP) + 1);
 
 	/* Get the number of implemented ports by the HBA */
 	ahci_ctlp->ahcictl_num_implemented_ports =
@@ -3537,6 +3566,7 @@ ahci_check_slot_handle(ahci_port_t *ahci_portp, int slot)
 	}
 	return (DDI_SUCCESS);
 }
+
 /*
  * Allocate the ports structure, only called by ahci_attach
  */
@@ -3637,28 +3667,11 @@ ahci_initialize_controller(ahci_ctl_t *ahci_ctlp)
 {
 	ahci_port_t *ahci_portp;
 	ahci_addr_t addr;
-	uint32_t ghc_control;
 	int port;
 
 	AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp,
 	    "ahci_initialize_controller enter", NULL);
 
-	mutex_enter(&ahci_ctlp->ahcictl_mutex);
-
-	/*
-	 * Indicate that system software is AHCI aware by setting
-	 * GHC.AE to 1
-	 */
-	ghc_control = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp));
-
-	ghc_control |= AHCI_HBA_GHC_AE;
-	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp),
-	    ghc_control);
-
-	mutex_exit(&ahci_ctlp->ahcictl_mutex);
-
 	/* Initialize the implemented ports and structures */
 	for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) {
 		if (!AHCI_PORT_IMPLEMENTED(ahci_ctlp, port)) {
@@ -3821,6 +3834,41 @@ ahci_dealloc_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp)
 }
 
 /*
+ * Staggered Spin-up.
+ *
+ * WARNING!!! ahciport_mutex should be acquired before the function
+ * is called.
+ */
+static void
+ahci_staggered_spin_up(ahci_ctl_t *ahci_ctlp, uint8_t port)
+{
+	uint32_t cap_status;
+	uint32_t port_cmd_status;
+
+	cap_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_GLOBAL_CAP(ahci_ctlp));
+
+	/* Check for staggered spin-up support */
+	if (!(cap_status & AHCI_HBA_CAP_SSS))
+		return;
+
+	port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
+
+	/* If PxCMD.SUD == 1, no staggered spin-up is needed */
+	if (port_cmd_status & AHCI_CMD_STATUS_SUD)
+		return;
+
+	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "Spin-up at port %d", port);
+
+	/* Set PxCMD.SUD */
+	port_cmd_status |= AHCI_CMD_STATUS_SUD;
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
+	    port_cmd_status);
+}
+
+/*
  * The routine is to initialize a port. First put the port in NOTRunning
  * state, then enable port interrupt and clear Serror register. And under
  * AHCI_ATTACH case, find device signature and then try to start the port.
@@ -3874,15 +3922,9 @@ ahci_initialize_port(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 		    "set PxCLB, PxCLBU, PxFB and PxFBU "
 		    "during resume", port);
 
-		/* Config Port Received FIS Base Address */
-		ddi_put64(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint64_t *)AHCI_PORT_PxFB(ahci_ctlp, port),
-		    ahci_portp->ahciport_rcvd_fis_dma_cookie.dmac_laddress);
-
-		/* Config Port Command List Base Address */
-		ddi_put64(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint64_t *)AHCI_PORT_PxCLB(ahci_ctlp, port),
-		    ahci_portp->ahciport_cmd_list_dma_cookie.dmac_laddress);
+		if (ahci_setup_port_base_addresses(ahci_ctlp, ahci_portp) !=
+		    AHCI_SUCCESS)
+			return (AHCI_FAILURE);
 	}
 
 	port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
@@ -3904,6 +3946,9 @@ ahci_initialize_port(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 		    ahci_portp, port);
 	}
 
+	/* Make sure the drive is spun-up */
+	ahci_staggered_spin_up(ahci_ctlp, port);
+
 	/* Disable interrupt */
 	ahci_disable_port_intrs(ahci_ctlp, port);
 
@@ -3939,7 +3984,7 @@ ahci_initialize_port(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 		if (ret != AHCI_SUCCESS) {
 			AHCIDBG(AHCIDBG_INIT|AHCIDBG_ERRS, ahci_ctlp,
 			    "ahci_initialize_port:"
-			    "port reset faild at port %d", port);
+			    "port reset failed at port %d", port);
 			return (AHCI_FAILURE);
 		}
 
@@ -3952,24 +3997,26 @@ ahci_initialize_port(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 			return (AHCI_FAILURE);
 		}
 	}
+
 	AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_STATE_READY);
 	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "port %d is ready now.", port);
 
 	/*
 	 * Try to get the device signature if the port is not empty.
 	 */
-	if (!resuming && ahci_portp->ahciport_device_type != SATA_DTYPE_NONE)
+	if (!resuming && AHCIPORT_DEV_TYPE(ahci_portp, addrp) !=
+	    SATA_DTYPE_NONE)
 		ahci_find_dev_signature(ahci_ctlp, ahci_portp, addrp);
 
 	/* Return directly if no device connected */
-	if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) {
+	if (AHCIPORT_DEV_TYPE(ahci_portp, addrp) == SATA_DTYPE_NONE) {
 		AHCIDBG(AHCIDBG_INIT, ahci_ctlp,
 		    "No device connected to port %d", port);
 		goto out;
 	}
 
 	/* If this is a port multiplier, we need do some initialization */
-	if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) {
+	if (AHCIPORT_DEV_TYPE(ahci_portp, addrp) == SATA_DTYPE_PMULT) {
 		AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
 		    "Port multiplier found at port %d", port);
 		ahci_alloc_pmult(ahci_ctlp, ahci_portp);
@@ -4884,8 +4931,8 @@ ahci_probe_pmport(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
  * the port must be idle and PxTFD.STS.BSY and PxTFD.STS.DRQ must be
  * cleared unless command list override (PxCMD.CLO) is supported.
  *
- * WARNING!!! ahciport_mutex should be acquired and PxCMD.FRE should be
- * set before the function is called.
+ * WARNING!!! ahciport_mutex should be acquired before the function
+ * is called.
  */
 static int
 ahci_software_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
@@ -5130,17 +5177,12 @@ out:
  *
  * When an HBA or port reset occurs, Phy communication is going to
  * be re-established with the device through a COMRESET followed by the
- * normal out-of-band communication sequence defined in Serial ATA. AT
+ * normal out-of-band communication sequence defined in Serial ATA. At
  * the end of reset, the device, if working properly, will send a D2H
  * Register FIS, which contains the device signature. When the HBA receives
  * this FIS, it updates PxTFD.STS and PxTFD.ERR register fields, and updates
  * the PxSIG register with the signature.
  *
- * Staggered spin-up is an optional feature in SATA II, and it enables an HBA
- * to individually spin-up attached devices. Please refer to chapter 10.9 of
- * AHCI 1.0 spec.
- */
-/*
  * WARNING!!! ahciport_mutex should be acquired, and PxCMD.ST should be also
  * cleared before the function is called.
  */
@@ -5149,7 +5191,7 @@ ahci_port_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
     ahci_addr_t *addrp)
 {
 	ahci_addr_t pmult_addr;
-	uint32_t cap_status, port_cmd_status;
+	uint32_t port_cmd_status;
 	uint32_t port_scontrol, port_sstatus, port_serror;
 	uint32_t port_intr_status, port_task_file;
 	uint32_t port_state;
@@ -5169,117 +5211,45 @@ ahci_port_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 	    "Port %d port resetting...", port);
 	ahci_portp->ahciport_port_state = 0;
 
-	cap_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint32_t *)AHCI_GLOBAL_CAP(ahci_ctlp));
-
 	port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
 	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
 
-	if (cap_status & AHCI_HBA_CAP_SSS) {
-		/*
-		 * HBA support staggered spin-up, if the port has
-		 * not spin up yet, then force it to do spin-up
-		 */
-		if (!(port_cmd_status & AHCI_CMD_STATUS_SUD)) {
-			if (!(ahci_portp->ahciport_flags
-			    & AHCI_PORT_FLAG_SPINUP)) {
-				AHCIDBG(AHCIDBG_INIT, ahci_ctlp,
-				    "Port %d PxCMD.SUD is zero, force "
-				    "it to do spin-up", port);
-				ahci_portp->ahciport_flags |=
-				    AHCI_PORT_FLAG_SPINUP;
-			}
-		}
-	} else {
-		/*
-		 * HBA doesn't support stagger spin-up, force it
-		 * to do normal COMRESET
-		 */
-		if (ahci_portp->ahciport_flags &
-		    AHCI_PORT_FLAG_SPINUP) {
-			AHCIDBG(AHCIDBG_INIT, ahci_ctlp,
-			    "HBA does not support staggered spin-up "
-			    "force it to do normal COMRESET", NULL);
-			ahci_portp->ahciport_flags &=
-			    ~AHCI_PORT_FLAG_SPINUP;
-		}
-	}
-
-	if (!(ahci_portp->ahciport_flags & AHCI_PORT_FLAG_SPINUP)) {
-		/* Do normal COMRESET */
-		AHCIDBG(AHCIDBG_INFO, ahci_ctlp,
-		    "ahci_port_reset: do normal COMRESET", port);
-
-		/*
-		 * According to the spec, SUD bit should be set here,
-		 * but JMicron JMB363 doesn't follow it, so remove
-		 * the assertion, and just print a debug message.
-		 */
-#if AHCI_DEBUG
-		if (!(port_cmd_status & AHCI_CMD_STATUS_SUD))
-			AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
-			    "port %d SUD bit not set", port)
-#endif
-
-		port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port));
-		SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_COMRESET);
-
-		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port),
-		    port_scontrol);
-
-		/* Enable PxCMD.FRE to read device */
-		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
-		    port_cmd_status|AHCI_CMD_STATUS_FRE);
-
-		/*
-		 * Give time for COMRESET to percolate, according to the AHCI
-		 * spec, software shall wait at least 1 millisecond before
-		 * clearing PxSCTL.DET
-		 */
-		drv_usecwait(AHCI_1MS_USECS*2);
-
-		/* Fetch the SCONTROL again and rewrite the DET part with 0 */
-		port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port));
-		SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_NOACTION);
-		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port),
-		    port_scontrol);
-	} else {
-		/* Do staggered spin-up */
-		port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port));
-		SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_NOACTION);
+	/*
+	 * According to the spec, SUD bit should be set here,
+	 * but JMicron JMB363 doesn't follow it, so print
+	 * a debug message.
+	 */
+	if (!(port_cmd_status & AHCI_CMD_STATUS_SUD))
+		AHCIDBG(AHCIDBG_ERRS, ahci_ctlp,
+		    "ahci_port_reset: port %d SUD bit not set", port);
 
-		/* PxSCTL.DET must be 0 */
-		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port),
-		    port_scontrol);
+	port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port));
+	SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_COMRESET);
 
-		port_cmd_status &= ~AHCI_CMD_STATUS_SUD;
-		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
-		    port_cmd_status);
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port),
+	    port_scontrol);
 
-		/* 0 -> 1 edge */
-		drv_usecwait(AHCI_1MS_USECS*2);
+	/* Enable PxCMD.FRE to read device */
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
+	    port_cmd_status|AHCI_CMD_STATUS_FRE);
 
-		/* Set PxCMD.SUD to 1 */
-		port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
-		port_cmd_status |= AHCI_CMD_STATUS_SUD;
-		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
-		    port_cmd_status);
+	/*
+	 * Give time for COMRESET to percolate, according to the AHCI
+	 * spec, software shall wait at least 1 millisecond before
+	 * clearing PxSCTL.DET
+	 */
+	drv_usecwait(AHCI_1MS_USECS * 2);
 
-		/* Enable PxCMD.FRE to read device */
-		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
-		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
-		    port_cmd_status|AHCI_CMD_STATUS_FRE);
-	}
+	/* Fetch the SCONTROL again and rewrite the DET part with 0 */
+	port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port));
+	SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_NOACTION);
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port),
+	    port_scontrol);
 
 	/*
 	 * The port enters P:StartComm state, and HBA tells link layer to
@@ -5645,7 +5615,7 @@ err:	/* R/W PMULT error */
  *
  * When an HBA reset occurs, Phy communication will be re-established with
  * the device through a COMRESET followed by the normal out-of-band
- * communication sequence defined in Serial ATA. AT the end of reset, the
+ * communication sequence defined in Serial ATA. At the end of reset, the
  * device, if working properly, will send a D2H Register FIS, which contains
  * the device signature. When the HBA receives this FIS, it updates PxTFD.STS
  * and PxTFD.ERR register fields, and updates the PxSIG register with the
@@ -5657,7 +5627,6 @@ static int
 ahci_hba_reset(ahci_ctl_t *ahci_ctlp)
 {
 	ahci_port_t *ahci_portp;
-	ahci_addr_t addr;
 	uint32_t ghc_control;
 	uint8_t port;
 	int loop_count;
@@ -5728,7 +5697,8 @@ ahci_hba_reset(ahci_ctl_t *ahci_ctlp)
 		ahci_portp = ahci_ctlp->ahcictl_ports[port];
 		mutex_enter(&ahci_portp->ahciport_mutex);
 
-		AHCI_ADDR_SET_PORT(&addr, port);
+		/* Make sure the drive is spun-up */
+		ahci_staggered_spin_up(ahci_ctlp, port);
 
 		if (ahci_restart_port_wait_till_ready(ahci_ctlp, ahci_portp,
 		    port, AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP, NULL) !=
@@ -5854,7 +5824,6 @@ ahci_find_dev_signature(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 	signature = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
 	    (uint32_t *)AHCI_PORT_PxSIG(ahci_ctlp, port));
 
-#ifdef AHCI_DEBUG
 	if (AHCI_ADDR_IS_PMPORT(addrp)) {
 		AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp,
 		    "ahci_find_dev_signature: signature = 0x%x at port %d:%d",
@@ -5864,7 +5833,6 @@ ahci_find_dev_signature(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 		    "ahci_find_dev_signature: signature = 0x%x at port %d",
 		    signature, port);
 	}
-#endif
 
 	/* NOTE: Only support ATAPI device at controller port. */
 	if (signature == AHCI_SIGNATURE_ATAPI && !AHCI_ADDR_IS_PORT(addrp))
@@ -5995,6 +5963,87 @@ ahci_start_port(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, uint8_t port)
 }
 
 /*
+ * Setup PxCLB, PxCLBU, PxFB, and PxFBU for particular port. First, we need
+ * to make sure PxCMD.ST, PxCMD.CR, PxCMD.FRE, and PxCMD.FR are all cleared.
+ * Then set PxCLB, PxCLBU, PxFB, and PxFBU.
+ *
+ * WARNING!!! ahciport_mutex should be acquired before the function is called.
+ */
+static int
+ahci_setup_port_base_addresses(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp)
+{
+	uint8_t port = ahci_portp->ahciport_port_num;
+	uint32_t port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
+
+	/* Step 1: Make sure both PxCMD.ST and PxCMD.CR are cleared. */
+	if (port_cmd_status & (AHCI_CMD_STATUS_ST | AHCI_CMD_STATUS_CR)) {
+		if (ahci_put_port_into_notrunning_state(ahci_ctlp, ahci_portp,
+		    port) != AHCI_SUCCESS)
+			return (AHCI_FAILURE);
+
+		port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
+	}
+
+	/* Step 2: Make sure both PxCMD.FRE and PxCMD.FR are cleared. */
+	if (port_cmd_status & (AHCI_CMD_STATUS_FRE | AHCI_CMD_STATUS_FR)) {
+		int loop_count = 0;
+
+		/* Clear PxCMD.FRE */
+		port_cmd_status &= ~AHCI_CMD_STATUS_FRE;
+		ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+		    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port),
+		    port_cmd_status);
+
+		/* Wait until PxCMD.FR is cleared */
+		for (;;) {
+			port_cmd_status =
+			    ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle,
+			    (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port));
+
+			if (!(port_cmd_status & AHCI_CMD_STATUS_FR))
+				break;
+
+			if (loop_count++ >= AHCI_POLLRATE_PORT_IDLE_FR) {
+				AHCIDBG(AHCIDBG_INIT | AHCIDBG_ERRS, ahci_ctlp,
+				    "ahci_setup_port_base_addresses: cannot "
+				    "clear PxCMD.FR for port %d.", port);
+
+				/*
+				 * We are effectively timing out after 0.5 sec.
+				 * This value is specified in AHCI spec.
+				 */
+				return (AHCI_FAILURE);
+			}
+
+			/* Wait for 1 millisec */
+			drv_usecwait(AHCI_1MS_USECS);
+		}
+	}
+
+	/* Step 3: Config Port Command List Base Address */
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCLB(ahci_ctlp, port),
+	    ahci_portp->ahciport_cmd_list_dma_cookie.dmac_address);
+
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxCLBU(ahci_ctlp, port),
+	    ahci_portp->ahciport_cmd_list_dma_cookie.dmac_notused);
+
+	/* Step 4: Config Port Received FIS Base Address */
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxFB(ahci_ctlp, port),
+	    ahci_portp->ahciport_rcvd_fis_dma_cookie.dmac_address);
+
+	ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle,
+	    (uint32_t *)AHCI_PORT_PxFBU(ahci_ctlp, port),
+	    ahci_portp->ahciport_rcvd_fis_dma_cookie.dmac_notused);
+
+	return (AHCI_SUCCESS);
+}
+
+/*
  * Allocate the ahci_port_t including Received FIS and Command List.
  * The argument - port is the physical port number, and not logical
  * port number seen by the SATA framework.
@@ -6028,14 +6077,20 @@ ahci_alloc_port_state(ahci_ctl_t *ahci_ctlp, uint8_t port)
 	 * Allocate memory for received FIS structure and
 	 * command list for this port
 	 */
-	if (ahci_alloc_rcvd_fis(ahci_ctlp, ahci_portp, port) != AHCI_SUCCESS) {
+	if (ahci_alloc_rcvd_fis(ahci_ctlp, ahci_portp) != AHCI_SUCCESS) {
 		goto err_case1;
 	}
 
-	if (ahci_alloc_cmd_list(ahci_ctlp, ahci_portp, port) != AHCI_SUCCESS) {
+	if (ahci_alloc_cmd_list(ahci_ctlp, ahci_portp) != AHCI_SUCCESS) {
 		goto err_case2;
 	}
 
+	/* Setup PxCMD.CLB, PxCMD.CLBU, PxCMD.FB, and PxCMD.FBU */
+	if (ahci_setup_port_base_addresses(ahci_ctlp, ahci_portp) !=
+	    AHCI_SUCCESS) {
+		goto err_case3;
+	}
+
 	(void) snprintf(taskq_name + strlen(taskq_name),
 	    sizeof (taskq_name) - strlen(taskq_name),
 	    "_port%d", port);
@@ -6087,7 +6142,7 @@ err_case1:
 }
 
 /*
- * Reverse of ahci_dealloc_port_state().
+ * Reverse of ahci_alloc_port_state().
  *
  * WARNING!!! ahcictl_mutex should be acquired before the function
  * is called.
@@ -6126,8 +6181,7 @@ ahci_dealloc_port_state(ahci_ctl_t *ahci_ctlp, uint8_t port)
  * is called.
  */
 static int
-ahci_alloc_rcvd_fis(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
-    uint8_t port)
+ahci_alloc_rcvd_fis(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp)
 {
 	size_t rcvd_fis_size;
 	size_t ret_len;
@@ -6185,11 +6239,6 @@ ahci_alloc_rcvd_fis(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 
 	bzero((void *)ahci_portp->ahciport_rcvd_fis, rcvd_fis_size);
 
-	/* Config Port Received FIS Base Address */
-	ddi_put64(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint64_t *)AHCI_PORT_PxFB(ahci_ctlp, port),
-	    ahci_portp->ahciport_rcvd_fis_dma_cookie.dmac_laddress);
-
 	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "64-bit, dma address: 0x%llx",
 	    ahci_portp->ahciport_rcvd_fis_dma_cookie.dmac_laddress);
 	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "32-bit, dma address: 0x%x",
@@ -6226,8 +6275,7 @@ ahci_dealloc_rcvd_fis(ahci_port_t *ahci_portp)
  * is called.
  */
 static int
-ahci_alloc_cmd_list(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
-    uint8_t port)
+ahci_alloc_cmd_list(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp)
 {
 	size_t cmd_list_size;
 	size_t ret_len;
@@ -6285,11 +6333,6 @@ ahci_alloc_cmd_list(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp,
 
 	bzero((void *)ahci_portp->ahciport_cmd_list, cmd_list_size);
 
-	/* Config Port Command List Base Address */
-	ddi_put64(ahci_ctlp->ahcictl_ahci_acc_handle,
-	    (uint64_t *)AHCI_PORT_PxCLB(ahci_ctlp, port),
-	    ahci_portp->ahciport_cmd_list_dma_cookie.dmac_laddress);
-
 	AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "64-bit, dma address: 0x%llx",
 	    ahci_portp->ahciport_cmd_list_dma_cookie.dmac_laddress);
 
diff --git a/usr/src/uts/common/sys/fm/fs/zfs.h b/usr/src/uts/common/sys/fm/fs/zfs.h
index c752edc99b..029af540b3 100644
--- a/usr/src/uts/common/sys/fm/fs/zfs.h
+++ b/usr/src/uts/common/sys/fm/fs/zfs.h
@@ -46,6 +46,7 @@ extern "C" {
 #define	FM_EREPORT_ZFS_IO_FAILURE		"io_failure"
 #define	FM_EREPORT_ZFS_PROBE_FAILURE		"probe_failure"
 #define	FM_EREPORT_ZFS_LOG_REPLAY		"log_replay"
+#define	FM_EREPORT_ZFS_CONFIG_CACHE_WRITE	"config_cache_write"
 
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL		"pool"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE	"pool_failmode"
diff --git a/usr/src/uts/common/sys/sata/adapters/ahci/ahcireg.h b/usr/src/uts/common/sys/sata/adapters/ahci/ahcireg.h
index 5614c929d5..e738783dfe 100644
--- a/usr/src/uts/common/sys/sata/adapters/ahci/ahcireg.h
+++ b/usr/src/uts/common/sys/sata/adapters/ahci/ahcireg.h
@@ -23,7 +23,9 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
-
+/*
+ * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
+ */
 
 #ifndef _AHCIREG_H
 #define	_AHCIREG_H
@@ -134,6 +136,10 @@ extern "C" {
 #define	AHCI_GLOBAL_EM_LOC(ahci_ctlp)	(AHCI_GLOBAL_OFFSET(ahci_ctlp) + 0x1c)
 	/* Enclosure Management Control */
 #define	AHCI_GLOBAL_EM_CTL(ahci_ctlp)	(AHCI_GLOBAL_OFFSET(ahci_ctlp) + 0x20)
+	/* HBA Capabilities Extended (AHCI spec 1.2) */
+#define	AHCI_GLOBAL_CAP2(ahci_ctlp)	(AHCI_GLOBAL_OFFSET(ahci_ctlp) + 0x24)
+	/* BIOS/OS Handoff Control and Status (AHCI spec 1.2) */
+#define	AHCI_GLOBAL_BOHC(ahci_ctlp)	(AHCI_GLOBAL_OFFSET(ahci_ctlp) + 0x28)
 
 #define	AHCI_PORT_IMPLEMENTED(ahci_ctlp, port)	\
 	((0x1 << port) & ahci_ctlp->ahcictl_ports_implemented)
diff --git a/usr/src/uts/common/sys/sata/adapters/ahci/ahcivar.h b/usr/src/uts/common/sys/sata/adapters/ahci/ahcivar.h
index bf8671425c..b28d0b6464 100644
--- a/usr/src/uts/common/sys/sata/adapters/ahci/ahcivar.h
+++ b/usr/src/uts/common/sys/sata/adapters/ahci/ahcivar.h
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  */
 
 
@@ -133,9 +134,6 @@ typedef struct ahci_pmult_info ahci_pmult_info_t;
 /*
  * flags for ahciport_flags
  *
- * AHCI_PORT_FLAG_SPINUP: this flag will be set when a HBA which supports
- * staggered spin-up needs to do a spin-up.
- *
  * AHCI_PORT_FLAG_MOPPING: this flag will be set when the HBA is stopped,
  * and all the outstanding commands need to be aborted and sent to upper
  * layers.
@@ -173,7 +171,6 @@ typedef struct ahci_pmult_info ahci_pmult_info_t;
  * will be printed. Note that, for INDENTIFY DEVICE command sent to ATAPI
  * device or ATAPI PACKET command, this flag won't be set.
  */
-#define	AHCI_PORT_FLAG_SPINUP		0x01
 #define	AHCI_PORT_FLAG_MOPPING		0x02
 #define	AHCI_PORT_FLAG_POLLING		0x04
 #define	AHCI_PORT_FLAG_RQSENSE		0x08
@@ -199,7 +196,6 @@ typedef struct ahci_port {
 	ahci_pmult_info_t	*ahciport_pmult_info;
 
 	/*
-	 * AHCI_PORT_FLAG_SPINUP
 	 * AHCI_PORT_FLAG_MOPPING
 	 * AHCI_PORT_FLAG_POLLING
 	 * AHCI_PORT_FLAG_RQSENSE
@@ -552,6 +548,7 @@ _NOTE(MUTEX_PROTECTS_DATA(ahci_ctl_t::ahcictl_mutex,
 #define	AHCI_POLLRATE_PORT_IDLE		50
 #define	AHCI_POLLRATE_PORT_SOFTRESET	100
 #define	AHCI_POLLRATE_GET_SPKT		100
+#define	AHCI_POLLRATE_PORT_IDLE_FR	500
 
 
 /* Clearing & setting the n'th bit in a given tag */
author	Keith M Wesolowski <wesolows@foobazco.org>	2013-06-11 22:47:43 +0000
committer	Keith M Wesolowski <wesolows@foobazco.org>	2013-06-11 22:47:43 +0000
commit	80702ccb4f267d5bfae86b07731a8f89e724d055 (patch)
tree	4bcba7532a1633ba6d708dbaf41c21af8876e7c0
parent	2ffdaec9c70166169b32efb691ef26af80d99263 (diff)
parent	3cb69f734bc60bbb4d56a28c83706db862bec082 (diff)
download	illumos-joyent-80702ccb4f267d5bfae86b07731a8f89e724d055.tar.gz