56 files changed, 3633 insertions, 339 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index d91379be96..fa9a3a4bf4 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -21,8 +21,8 @@
 
 #
 # Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 # Copyright (c) 2012 Joyent, Inc.  All rights reserved.
+# Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
 # Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
@@ -1953,6 +1953,16 @@ NXGE_HCALL_OBJS =	\
 		nxge_hcall.o
 
 #
+# Virtio modules
+#
+
+# Virtio core
+VIRTIO_OBJS = virtio.o
+
+# Virtio block driver
+VIOBLK_OBJS = vioblk.o
+
+#
 #	kiconv modules
 #
 KICONV_EMEA_OBJS += kiconv_emea.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index b420a7d8e1..27478a210d 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -24,8 +24,8 @@
 #
 
 #
-# Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 # Copyright (c) 2012 Joyent, Inc.  All rights reserved.
+# Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
 #
 
 #
@@ -1420,6 +1420,14 @@ $(OBJS_DIR)/%.o:		$(UTSBASE)/common/io/yge/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
 
+$(OBJS_DIR)/%.o:		$(UTSBASE)/common/io/virtio/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o:		$(UTSBASE)/common/io/vioblk/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 #
 # krtld must refer to its own bzero/bcopy until the kernel is fully linked
 #
@@ -2671,6 +2679,12 @@ $(LINTS_DIR)/%.ln:		$(COMMONBASE)/iscsi/%.c
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/common/inet/kifconf/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
+$(LINTS_DIR)/%.ln:		$(UTSBASE)/common/io/virtio/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln:		$(UTSBASE)/common/io/vioblk/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
 ZMODLINTFLAGS = -erroff=E_CONSTANT_CONDITION
 
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/common/zmod/%.c
diff --git a/usr/src/uts/common/cpr/cpr_main.c b/usr/src/uts/common/cpr/cpr_main.c
index 68a4040186..15e8c6c8d8 100644
--- a/usr/src/uts/common/cpr/cpr_main.c
+++ b/usr/src/uts/common/cpr/cpr_main.c
@@ -166,7 +166,7 @@ cpr_main(int sleeptype)
 			 */
 			rc = i_cpr_power_down(sleeptype);
 			if (rc == 0) {
-				PMD(PMD_SX, ("back from succssful suspend\n"))
+				PMD(PMD_SX, ("back from successful suspend\n"))
 			}
 			/*
 			 * We do care about the return value from cpr_resume
diff --git a/usr/src/uts/common/exec/elf/elf_notes.c b/usr/src/uts/common/exec/elf/elf_notes.c
index 8649e64d48..719d215dd5 100644
--- a/usr/src/uts/common/exec/elf/elf_notes.c
+++ b/usr/src/uts/common/exec/elf/elf_notes.c
@@ -24,7 +24,9 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
+/*
+ * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -34,8 +36,11 @@
 #include <sys/cred.h>
 #include <sys/priv.h>
 #include <sys/user.h>
+#include <sys/file.h>
 #include <sys/errno.h>
 #include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/vfs.h>
 #include <sys/mman.h>
 #include <sys/kmem.h>
 #include <sys/proc.h>
@@ -57,6 +62,7 @@
 #include <sys/modctl.h>
 #include <sys/systeminfo.h>
 #include <sys/machelf.h>
+#include <sys/sunddi.h>
 #include "elf_impl.h"
 #if defined(__i386) || defined(__i386_COMPAT)
 #include <sys/sysi86.h>
@@ -67,12 +73,27 @@ setup_note_header(Phdr *v, proc_t *p)
 {
 	int nlwp = p->p_lwpcnt;
 	int nzomb = p->p_zombcnt;
+	int nfd;
 	size_t size;
 	prcred_t *pcrp;
+	uf_info_t *fip;
+	uf_entry_t *ufp;
+	int fd;
+
+	fip = P_FINFO(p);
+	nfd = 0;
+	mutex_enter(&fip->fi_lock);
+	for (fd = 0; fd < fip->fi_nfiles; fd++) {
+		UF_ENTER(ufp, fip, fd);
+		if ((ufp->uf_file != NULL) && (ufp->uf_file->f_count > 0))
+			nfd++;
+		UF_EXIT(ufp);
+	}
+	mutex_exit(&fip->fi_lock);
 
 	v[0].p_type = PT_NOTE;
 	v[0].p_flags = PF_R;
-	v[0].p_filesz = (sizeof (Note) * (9 + 2 * nlwp + nzomb))
+	v[0].p_filesz = (sizeof (Note) * (9 + 2 * nlwp + nzomb + nfd))
 	    + roundup(sizeof (psinfo_t), sizeof (Word))
 	    + roundup(sizeof (pstatus_t), sizeof (Word))
 	    + roundup(prgetprivsize(), sizeof (Word))
@@ -83,7 +104,8 @@ setup_note_header(Phdr *v, proc_t *p)
 	    + roundup(sizeof (utsname), sizeof (Word))
 	    + roundup(sizeof (core_content_t), sizeof (Word))
 	    + (nlwp + nzomb) * roundup(sizeof (lwpsinfo_t), sizeof (Word))
-	    + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word));
+	    + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word))
+	    + nfd * roundup(sizeof (prfdinfo_t), sizeof (Word));
 
 	size = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
 	pcrp = kmem_alloc(size, KM_SLEEP);
@@ -97,6 +119,7 @@ setup_note_header(Phdr *v, proc_t *p)
 	}
 	kmem_free(pcrp, size);
 
+
 #if defined(__i386) || defined(__i386_COMPAT)
 	mutex_enter(&p->p_ldtlock);
 	size = prnldt(p) * sizeof (struct ssd);
@@ -159,7 +182,7 @@ write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
 	size_t crsize = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
 	size_t psize = prgetprivsize();
 	size_t bigsize = MAX(psize, MAX(sizeof (*bigwad),
-					MAX(xregsize, crsize)));
+	    MAX(xregsize, crsize)));
 
 	priv_impl_info_t *prii;
 
@@ -173,6 +196,10 @@ write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
 	int nzomb;
 	int error;
 	uchar_t oldsig;
+	uf_info_t *fip;
+	int fd;
+	vnode_t *vroot;
+
 #if defined(__i386) || defined(__i386_COMPAT)
 	struct ssd *ssd;
 	size_t ssdsize;
@@ -293,6 +320,89 @@ write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
 	if (error)
 		goto done;
 
+
+	/* open file table */
+	vroot = PTOU(p)->u_rdir;
+	if (vroot == NULL)
+		vroot = rootdir;
+
+	VN_HOLD(vroot);
+
+	fip = P_FINFO(p);
+
+	for (fd = 0; fd < fip->fi_nfiles; fd++) {
+		uf_entry_t *ufp;
+		vnode_t *fvp;
+		struct file *fp;
+		vattr_t vattr;
+		prfdinfo_t fdinfo;
+
+		bzero(&fdinfo, sizeof (fdinfo));
+
+		mutex_enter(&fip->fi_lock);
+		UF_ENTER(ufp, fip, fd);
+		if (((fp = ufp->uf_file) == NULL) || (fp->f_count < 1)) {
+			UF_EXIT(ufp);
+			mutex_exit(&fip->fi_lock);
+			continue;
+		}
+
+		fdinfo.pr_fd = fd;
+		fdinfo.pr_fdflags = ufp->uf_flag;
+		fdinfo.pr_fileflags = fp->f_flag2;
+		fdinfo.pr_fileflags <<= 16;
+		fdinfo.pr_fileflags |= fp->f_flag;
+		if ((fdinfo.pr_fileflags & (FSEARCH | FEXEC)) == 0)
+			fdinfo.pr_fileflags += FOPEN;
+		fdinfo.pr_offset = fp->f_offset;
+
+
+		fvp = fp->f_vnode;
+		VN_HOLD(fvp);
+		UF_EXIT(ufp);
+		mutex_exit(&fip->fi_lock);
+
+		/*
+		 * There are some vnodes that have no corresponding
+		 * path.  Its reasonable for this to fail, in which
+		 * case the path will remain an empty string.
+		 */
+		(void) vnodetopath(vroot, fvp, fdinfo.pr_path,
+		    sizeof (fdinfo.pr_path), credp);
+
+		error = VOP_GETATTR(fvp, &vattr, 0, credp, NULL);
+		if (error != 0) {
+			VN_RELE(fvp);
+			VN_RELE(vroot);
+			goto done;
+		}
+
+		if (fvp->v_type == VSOCK)
+			fdinfo.pr_fileflags |= sock_getfasync(fvp);
+
+		VN_RELE(fvp);
+
+		/*
+		 * This logic mirrors fstat(), which we cannot use
+		 * directly, as it calls copyout().
+		 */
+		fdinfo.pr_major = getmajor(vattr.va_fsid);
+		fdinfo.pr_minor = getminor(vattr.va_fsid);
+		fdinfo.pr_ino = (ino64_t)vattr.va_nodeid;
+		fdinfo.pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
+		fdinfo.pr_uid = vattr.va_uid;
+		fdinfo.pr_gid = vattr.va_gid;
+		fdinfo.pr_rmajor = getmajor(vattr.va_rdev);
+		fdinfo.pr_rminor = getminor(vattr.va_rdev);
+		fdinfo.pr_size = (off64_t)vattr.va_size;
+
+		error = elfnote(vp, &offset, NT_FDINFO,
+		    sizeof (fdinfo), &fdinfo, rlimit, credp);
+		if (error) {
+			goto done;
+		}
+	}
+
 #if defined(__i386) || defined(__i386_COMPAT)
 	mutex_enter(&p->p_ldtlock);
 	ssdsize = prnldt(p) * sizeof (struct ssd);
diff --git a/usr/src/uts/common/fs/nfs/nfs3_srv.c b/usr/src/uts/common/fs/nfs/nfs3_srv.c
index c72f823cd3..4acbe92ad9 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_srv.c
@@ -433,16 +433,25 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 		goto out1;
 	}
 
+	exi_hold(exi);
+
 	/*
 	 * If the public filehandle is used then allow
 	 * a multi-component lookup
 	 */
 	if (PUBLIC_FH3(&args->what.dir)) {
+		struct exportinfo *new;
+
 		publicfh_flag = TRUE;
+
 		error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
-		    &exi, &sec);
-		if (error && exi != NULL)
-			exi_rele(exi); /* See comment below Re: publicfh_flag */
+		    &new, &sec);
+
+		if (error == 0) {
+			exi_rele(exi);
+			exi = new;
+		}
+
 		/*
 		 * Since WebNFS may bypass MOUNT, we need to ensure this
 		 * request didn't come from an unlabeled admin_low client.
@@ -464,8 +473,6 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 			if (tp == NULL || tp->tpc_tp.tp_doi !=
 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
 			    SUN_CIPSO) {
-				if (exi != NULL)
-					exi_rele(exi);
 				VN_RELE(vp);
 				resp->status = NFS3ERR_ACCES;
 				error = 1;
@@ -491,8 +498,6 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
 			if (!do_rfs_label_check(clabel, dvp,
 			    DOMINANCE_CHECK, exi)) {
-				if (publicfh_flag && exi != NULL)
-					exi_rele(exi);
 				VN_RELE(vp);
 				resp->status = NFS3ERR_ACCES;
 				error = 1;
@@ -519,18 +524,10 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 		goto out;
 	}
 
-	/*
-	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
-	 * and have obtained a new exportinfo in exi which needs to be
-	 * released. Note the the original exportinfo pointed to by exi
-	 * will be released by the caller, common_dispatch.
-	 */
-	if (publicfh_flag)
-		exi_rele(exi);
-
 	va.va_mask = AT_ALL;
 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
 
+	exi_rele(exi);
 	VN_RELE(vp);
 
 	resp->status = NFS3_OK;
@@ -552,6 +549,12 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
 	return;
 
 out:
+	/*
+	 * The passed argument exportinfo is released by the
+	 * caller, common_dispatch
+	 */
+	exi_rele(exi);
+
 	if (curthread->t_flag & T_WOULDBLOCK) {
 		curthread->t_flag &= ~T_WOULDBLOCK;
 		resp->status = NFS3ERR_JUKEBOX;
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c
index 29a9d67497..f2a9734541 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c
@@ -21,6 +21,9 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
 
 /*
  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
@@ -1131,6 +1134,7 @@ rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 	char *nm;
 	struct sockaddr *ca;
 	char *name = NULL;
+	nfsstat4 status = NFS4_OK;
 
 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
 	    SECINFO4args *, args);
@@ -1154,11 +1158,12 @@ rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 	 * do not error out if the component name is a "..".
 	 * SECINFO will return its parents secinfo data for SECINFO "..".
 	 */
-	if (!utf8_dir_verify(utfnm)) {
+	status = utf8_dir_verify(utfnm);
+	if (status != NFS4_OK) {
 		if (utfnm->utf8string_len != 2 ||
 		    utfnm->utf8string_val[0] != '.' ||
 		    utfnm->utf8string_val[1] != '.') {
-			*cs->statusp = resp->status = NFS4ERR_INVAL;
+			*cs->statusp = resp->status = status;
 			goto out;
 		}
 	}
@@ -1336,7 +1341,8 @@ rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 		    blequal(clabel, slabel)))
 			resp->access |=
 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
-		resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND);
+		resp->supported |=
+		    resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
 	}
 
 	if (checkwriteperm &&
@@ -1570,8 +1576,9 @@ rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
 		goto out;
 	}
-	if (!utf8_dir_verify(&args->objname)) {
-		*cs->statusp = resp->status = NFS4ERR_INVAL;
+	status = utf8_dir_verify(&args->objname);
+	if (status != NFS4_OK) {
+		*cs->statusp = resp->status = status;
 		goto out;
 	}
 
@@ -2446,6 +2453,7 @@ rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 	uint_t  len;
 	struct sockaddr *ca;
 	char *name = NULL;
+	nfsstat4 status;
 
 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
 	    LINK4args *, args);
@@ -2495,8 +2503,9 @@ rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 		goto out;
 	}
 
-	if (!utf8_dir_verify(&args->newname)) {
-		*cs->statusp = resp->status = NFS4ERR_INVAL;
+	status = utf8_dir_verify(&args->newname);
+	if (status != NFS4_OK) {
+		*cs->statusp = resp->status = status;
 		goto out;
 	}
 
@@ -2886,6 +2895,7 @@ rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 	uint_t len;
 	struct sockaddr *ca;
 	char *name = NULL;
+	nfsstat4 status;
 
 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
 	    LOOKUP4args *, args);
@@ -2905,8 +2915,9 @@ rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 		goto out;
 	}
 
-	if (!utf8_dir_verify(&args->objname)) {
-		*cs->statusp = resp->status = NFS4ERR_INVAL;
+	status = utf8_dir_verify(&args->objname);
+	if (status != NFS4_OK) {
+		*cs->statusp = resp->status = status;
 		goto out;
 	}
 
@@ -3655,30 +3666,6 @@ out:
 }
 
 /*
- * A directory entry is a valid nfsv4 entry if
- * - it has a non-zero ino
- * - it is not a dot or dotdot name
- * - it is visible in a pseudo export or in a real export that can
- *   only have a limited view.
- */
-static bool_t
-valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
-    int *expseudo, int check_visible)
-{
-	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
-		*expseudo = 0;
-		return (FALSE);
-	}
-
-	if (! check_visible) {
-		*expseudo = 0;
-		return (TRUE);
-	}
-
-	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
-}
-
-/*
  * set_rdattr_params sets up the variables used to manage what information
  * to get for each directory entry.
  */
@@ -4101,6 +4088,7 @@ rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 	bslabel_t *clabel;
 	struct sockaddr *ca;
 	char *name = NULL;
+	nfsstat4 status;
 
 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
 	    REMOVE4args *, args);
@@ -4131,8 +4119,9 @@ rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 		goto out;
 	}
 
-	if (!utf8_dir_verify(&args->target)) {
-		*cs->statusp = resp->status = NFS4ERR_INVAL;
+	status = utf8_dir_verify(&args->target);
+	if (status != NFS4_OK) {
+		*cs->statusp = resp->status = status;
 		goto out;
 	}
 
@@ -4398,6 +4387,7 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 	struct sockaddr *ca;
 	char *converted_onm = NULL;
 	char *converted_nnm = NULL;
+	nfsstat4 status;
 
 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
 	    RENAME4args *, args);
@@ -4454,13 +4444,15 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 		goto out;
 	}
 
-	if (!utf8_dir_verify(&args->oldname)) {
-		*cs->statusp = resp->status = NFS4ERR_INVAL;
+	status = utf8_dir_verify(&args->oldname);
+	if (status != NFS4_OK) {
+		*cs->statusp = resp->status = status;
 		goto out;
 	}
 
-	if (!utf8_dir_verify(&args->newname)) {
-		*cs->statusp = resp->status = NFS4ERR_INVAL;
+	status = utf8_dir_verify(&args->newname);
+	if (status != NFS4_OK) {
+		*cs->statusp = resp->status = status;
 		goto out;
 	}
 
@@ -5789,6 +5781,8 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
 
 	cs.statusp = &resp->status;
 	cs.req = req;
+	resp->array = NULL;
+	resp->array_len = 0;
 
 	/*
 	 * XXX for now, minorversion should be zero
@@ -5796,14 +5790,17 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
 	if (args->minorversion != NFS4_MINORVERSION) {
 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
 		    &cs, COMPOUND4args *, args);
-		resp->array_len = 0;
-		resp->array = NULL;
 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
 		    &cs, COMPOUND4res *, resp);
 		return;
 	}
 
+	if (args->array_len == 0) {
+		resp->status = NFS4_OK;
+		return;
+	}
+
 	ASSERT(exi == NULL);
 	ASSERT(cr == NULL);
 
@@ -6079,8 +6076,9 @@ rfs4_lookup(component4 *component, struct svc_req *req,
 		return (NFS4ERR_NOTDIR);
 	}
 
-	if (!utf8_dir_verify(component))
-		return (NFS4ERR_INVAL);
+	status = utf8_dir_verify(component);
+	if (status != NFS4_OK)
+		return (status);
 
 	nm = utf8_to_fn(component, &len, NULL);
 	if (nm == NULL) {
@@ -6372,8 +6370,9 @@ rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
 	 * the including directory on success.
 	 */
 	component = &args->open_claim4_u.file;
-	if (!utf8_dir_verify(component))
-		return (NFS4ERR_INVAL);
+	status = utf8_dir_verify(component);
+	if (status != NFS4_OK)
+		return (status);
 
 	nm = utf8_to_fn(component, &buflen, NULL);
 
@@ -7594,6 +7593,12 @@ rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
 		goto out;
 	}
 
+	if (cs->vp->v_type != VREG) {
+		*cs->statusp = resp->status =
+		    cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
+		return;
+	}
+
 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
 	if (status != NFS4_OK) {
 		*cs->statusp = resp->status = status;
@@ -7709,6 +7714,11 @@ rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
 		goto out;
 	}
 
+	if (cs->vp->v_type != VREG) {
+		*cs->statusp = resp->status = NFS4ERR_INVAL;
+		return;
+	}
+
 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
 	if (status != NFS4_OK) {
 		*cs->statusp = resp->status = status;
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
index dbd3263608..855cd8cd92 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
@@ -22,6 +22,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
 
 #include <sys/systm.h>
 #include <sys/cmn_err.h>
@@ -1585,7 +1588,8 @@ rfs4_fattr4_fs_locations(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg,
 	case NFS4ATTR_GETIT:
 		fsl = fetch_referral(sarg->cs->vp, sarg->cs->cr);
 		if (fsl == NULL)
-			error = EINVAL;
+			(void) memset(&(na->fs_locations), 0,
+			    sizeof (fs_locations4));
 		else {
 			na->fs_locations = *fsl;
 			kmem_free(fsl, sizeof (fs_locations4));
diff --git a/usr/src/uts/common/fs/nfs/nfs4_subr.c b/usr/src/uts/common/fs/nfs/nfs4_subr.c
index c14117c009..cfac742707 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_subr.c
@@ -22,6 +22,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
 
 /*
  *  	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
@@ -713,33 +716,33 @@ utf8_compare(const utf8string *a, const utf8string *b)
 /*
  * utf8_dir_verify - checks that the utf8 string is valid
  */
-int
+nfsstat4
 utf8_dir_verify(utf8string *str)
 {
 	char *nm;
 	int len;
 
 	if (str == NULL)
-		return (0);
+		return (NFS4ERR_INVAL);
 
 	nm = str->utf8string_val;
 	len = str->utf8string_len;
 	if (nm == NULL || len == 0) {
-		return (0);
+		return (NFS4ERR_INVAL);
 	}
 
 	if (len == 1 && nm[0] == '.')
-		return (0);
+		return (NFS4ERR_BADNAME);
 	if (len == 2 && nm[0] == '.' && nm[1] == '.')
-		return (0);
+		return (NFS4ERR_BADNAME);
 
 	if (utf8_strchr(str, '/') != NULL)
-		return (0);
+		return (NFS4ERR_BADNAME);
 
 	if (utf8_strchr(str, '\0') != NULL)
-		return (0);
+		return (NFS4ERR_BADNAME);
 
-	return (1);
+	return (NFS4_OK);
 }
 
 /*
diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c
index bb625bb175..22d1ad4d68 100644
--- a/usr/src/uts/common/fs/nfs/nfs_server.c
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c
@@ -22,6 +22,7 @@
  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
  * Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -2804,8 +2805,8 @@ rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp,
 			 */
 
 			/* Release the reference on the old exi value */
-			ASSERT(*exi != NULL);
 			exi_rele(*exi);
+			*exi = NULL;
 
 			if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
 				VN_RELE(*vpp);
@@ -2818,6 +2819,9 @@ publicfh_done:
 	if (mc_dvp)
 		VN_RELE(mc_dvp);
 
+	if (error && *exi != NULL)
+		exi_rele(*exi);
+
 	return (error);
 }
 
@@ -2963,16 +2967,19 @@ URLparse(char *str)
 /*
  * Get the export information for the lookup vnode, and verify its
  * useable.
+ *
+ * Set @exip only in success
  */
 int
 nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
-    struct exportinfo **exi)
+    struct exportinfo **exip)
 {
 	int walk;
 	int error = 0;
+	struct exportinfo *exi;
 
-	*exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
-	if (*exi == NULL)
+	exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
+	if (exi == NULL)
 		error = EACCES;
 	else {
 		/*
@@ -2981,10 +2988,13 @@ nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
 		 * must not terminate below the
 		 * exported directory.
 		 */
-		if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
+		if (exi->exi_export.ex_flags & EX_NOSUB && walk > 0) {
 			error = EACCES;
+			exi_rele(exi);
+		}
 	}
-
+	if (error == 0)
+		*exip = exi;
 	return (error);
 }
 
diff --git a/usr/src/uts/common/fs/nfs/nfs_srv.c b/usr/src/uts/common/fs/nfs/nfs_srv.c
index 8ca8ee5d1d..f0cd9633aa 100644
--- a/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs_srv.c
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -399,6 +400,8 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
 		return;
 	}
 
+	exi_hold(exi);
+
 	/*
 	 * If the public filehandle is used then allow
 	 * a multi-component lookup, i.e. evaluate
@@ -409,9 +412,16 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
 	 * which is OK as long as the filesystem is exported.
 	 */
 	if (PUBLIC_FH2(fhp)) {
+		struct exportinfo *new;
+
 		publicfh_flag = TRUE;
-		error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
+		error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &new,
 		    &sec);
+
+		if (error == 0) {
+			exi_rele(exi);
+			exi = new;
+		}
 	} else {
 		/*
 		 * Do a normal single component lookup.
@@ -452,13 +462,10 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
 	VN_RELE(dvp);
 
 	/*
-	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
-	 * and have obtained a new exportinfo in exi which needs to be
-	 * released. Note the the original exportinfo pointed to by exi
-	 * will be released by the caller, comon_dispatch.
+	 * The passed argument exportinfo is released by the
+	 * caller, comon_dispatch
 	 */
-	if (publicfh_flag && exi != NULL)
-		exi_rele(exi);
+	exi_rele(exi);
 
 	/*
 	 * If it's public fh, no 0x81, and client's flavor is
diff --git a/usr/src/uts/common/fs/smbsrv/smb_delete.c b/usr/src/uts/common/fs/smbsrv/smb_delete.c
index 43f6d733bd..8a27b7408e 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_delete.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_delete.c
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <smbsrv/smb_kproto.h>
@@ -553,7 +554,7 @@ smb_delete_check_path(smb_request_t *sr)
 	/* fname component is, or resolves to, '.' (dot) */
 	if ((strcmp(pn->pn_fname, ".") == 0) ||
 	    (SMB_SEARCH_DIRECTORY(fqi->fq_sattr) &&
-	    (smb_match(pn->pn_fname, ".")))) {
+	    (smb_match(pn->pn_fname, ".", B_FALSE)))) {
 		smbsr_error(sr, NT_STATUS_OBJECT_NAME_INVALID,
 		    ERRDOS, ERROR_INVALID_NAME);
 		return (-1);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_kutil.c b/usr/src/uts/common/fs/smbsrv/smb_kutil.c
index 5d45081e2e..aed58277be 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_kutil.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_kutil.c
@@ -18,8 +18,10 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <sys/param.h>
@@ -98,116 +100,34 @@ smb_ascii_or_unicode_null_len(struct smb_request *sr)
 }
 
 /*
- * Return B_TRUE if pattern contains wildcards
- */
-boolean_t
-smb_contains_wildcards(const char *pattern)
-{
-	static const char *wildcards = "*?";
-
-	return (strpbrk(pattern, wildcards) != NULL);
-}
-
-/*
- * When converting wildcards a '.' in a name is treated as a base and
- * extension separator even if the name is longer than 8.3.
- *
- * The '*' character matches an entire part of the name.  For example,
- * "*.abc" matches any name with an extension of "abc".
  *
- * The '?' character matches a single character.
- * If the base contains all ? (8 or more) then it is treated as *.
- * If the extension contains all ? (3 or more) then it is treated as *.
- *
- * Clients convert ASCII wildcards to Unicode wildcards as follows:
+ * Convert old-style (DOS, LanMan) wildcard strings to NT style.
+ * This should ONLY happen to patterns that come from old clients,
+ * meaning dialect LANMAN2_1 etc. (dialect < NT_LM_0_12).
  *
  *	? is converted to >
- *	. is converted to " if it is followed by ? or *
  *	* is converted to < if it is followed by .
+ *	. is converted to " if it is followed by ? or * or end of pattern
  *
- * Note that clients convert "*." to '< and drop the '.' but "*.txt"
- * is sent as "<.TXT", i.e.
- *
- * 	dir *.		->	dir <
- * 	dir *.txt	->	dir <.TXT
- *
- * Since " and < are illegal in Windows file names, we always convert
- * these Unicode wildcards without checking the following character.
+ * Note: modifies pattern in place.
  */
 void
 smb_convert_wildcards(char *pattern)
 {
-	static char *match_all[] = {
-		"*.",
-		"*.*"
-	};
-	char	*extension;
 	char	*p;
-	int	len;
-	int	i;
 
-	/*
-	 * Special case "<" for "dir *.", and fast-track for "*".
-	 */
-	if ((*pattern == '<') || (*pattern == '*')) {
-		if (*(pattern + 1) == '\0') {
-			*pattern = '*';
-			return;
-		}
-	}
-
-	for (p = pattern; *p != '\0'; ++p) {
+	for (p = pattern; *p != '\0'; p++) {
 		switch (*p) {
-		case '<':
-			*p = '*';
-			break;
-		case '>':
-			*p = '?';
+		case '?':
+			*p = '>';
 			break;
-		case '\"':
-			*p = '.';
+		case '*':
+			if (p[1] == '.')
+				*p = '<';
 			break;
-		default:
-			break;
-		}
-	}
-
-	/*
-	 * Replace "????????.ext" with "*.ext".
-	 */
-	p = pattern;
-	p += strspn(p, "?");
-	if (*p == '.') {
-		*p = '\0';
-		len = strlen(pattern);
-		*p = '.';
-		if (len >= SMB_NAME83_BASELEN) {
-			*pattern = '*';
-			(void) strlcpy(pattern + 1, p, MAXPATHLEN - 1);
-		}
-	}
-
-	/*
-	 * Replace "base.???" with 'base.*'.
-	 */
-	if ((extension = strrchr(pattern, '.')) != NULL) {
-		p = ++extension;
-		p += strspn(p, "?");
-		if (*p == '\0') {
-			len = strlen(extension);
-			if (len >= SMB_NAME83_EXTLEN) {
-				*extension = '\0';
-				(void) strlcat(pattern, "*", MAXPATHLEN);
-			}
-		}
-	}
-
-	/*
-	 * Replace anything that matches an entry in match_all with "*".
-	 */
-	for (i = 0; i < sizeof (match_all) / sizeof (match_all[0]); ++i) {
-		if (strcmp(pattern, match_all[i]) == 0) {
-			(void) strlcpy(pattern, "*", MAXPATHLEN);
+		case '.':
+			if (p[1] == '?' || p[1] == '*' || p[1] == '\0')
+				*p = '\"';
 			break;
 		}
 	}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_odir.c b/usr/src/uts/common/fs/smbsrv/smb_odir.c
index ea9b505f0d..610126753b 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_odir.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_odir.c
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  */
 
 /*
@@ -286,7 +287,8 @@ smb_odir_open(smb_request_t *sr, char *path, uint16_t sattr, uint32_t flags)
 
 	tree = sr->tid_tree;
 
-	smb_convert_wildcards(path);
+	if (sr->session->dialect < NT_LM_0_12)
+		smb_convert_wildcards(path);
 
 	rc = smb_pathname_reduce(sr, sr->user_cr, path,
 	    tree->t_snode, tree->t_snode, &dnode, pattern);
@@ -1278,22 +1280,23 @@ smb_odir_lookup_link(smb_request_t *sr, smb_odir_t *od,
  * - If shortnames are supported, generate the shortname from
  *   odirent->od_name and check if it matches od->d_pattern.
  */
-boolean_t
+static boolean_t
 smb_odir_match_name(smb_odir_t *od, smb_odirent_t *odirent)
 {
 	char	*name = odirent->od_name;
 	char	shortname[SMB_SHORTNAMELEN];
 	ino64_t	ino = odirent->od_ino;
+	boolean_t ci = (od->d_flags & SMB_ODIR_FLAG_IGNORE_CASE) != 0;
 
 	if (smb_is_reserved_dos_name(name))
 		return (B_FALSE);
 
-	if (smb_match_ci(od->d_pattern, name))
+	if (smb_match(od->d_pattern, name, ci))
 		return (B_TRUE);
 
 	if (od->d_flags & SMB_ODIR_FLAG_SHORTNAMES) {
 		smb_mangle(name, ino, shortname, SMB_SHORTNAMELEN);
-		if (smb_match_ci(od->d_pattern, shortname))
+		if (smb_match(od->d_pattern, shortname, ci))
 			return (B_TRUE);
 	}
 
diff --git a/usr/src/uts/common/fs/smbsrv/smb_pathname.c b/usr/src/uts/common/fs/smbsrv/smb_pathname.c
index e3ae3ffba2..db9883667e 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_pathname.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_pathname.c
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <smbsrv/smb_kproto.h>
@@ -732,8 +733,8 @@ smb_pathname_preprocess(smb_request_t *sr, smb_pathname_t *pn)
 		return;
 	}
 
-	/* perform unicode wildcard conversion */
-	smb_convert_wildcards(pn->pn_path);
+	if (sr->session->dialect < NT_LM_0_12)
+		smb_convert_wildcards(pn->pn_path);
 
 	/* treat '/' as '\\' */
 	(void) strsubst(pn->pn_path, '/', '\\');
diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c
index abe3a23e75..8d5c741428 100644
--- a/usr/src/uts/common/fs/vfs.c
+++ b/usr/src/uts/common/fs/vfs.c
@@ -936,29 +936,33 @@ vfs_mountroot(void)
 	}
 #endif /* __sparc */
 
-	/*
-	 * Look up the root device via devfs so that a dv_node is
-	 * created for it. The vnode is never VN_RELE()ed.
-	 * We allocate more than MAXPATHLEN so that the
-	 * buffer passed to i_ddi_prompath_to_devfspath() is
-	 * exactly MAXPATHLEN (the function expects a buffer
-	 * of that length).
-	 */
-	plen = strlen("/devices");
-	path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP);
-	(void) strcpy(path, "/devices");
+	if (strcmp(rootfs.bo_fstype, "zfs") != 0) {
+		/*
+		 * Look up the root device via devfs so that a dv_node is
+		 * created for it. The vnode is never VN_RELE()ed.
+		 * We allocate more than MAXPATHLEN so that the
+		 * buffer passed to i_ddi_prompath_to_devfspath() is
+		 * exactly MAXPATHLEN (the function expects a buffer
+		 * of that length).
+		 */
+		plen = strlen("/devices");
+		path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP);
+		(void) strcpy(path, "/devices");
 
-	if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen)
-	    != DDI_SUCCESS ||
-	    lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) {
+		if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen)
+		    != DDI_SUCCESS ||
+		    lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) {
 
-		/* NUL terminate in case "path" has garbage */
-		path[plen + MAXPATHLEN - 1] = '\0';
+			/* NUL terminate in case "path" has garbage */
+			path[plen + MAXPATHLEN - 1] = '\0';
 #ifdef	DEBUG
-		cmn_err(CE_WARN, "!Cannot lookup root device: %s", path);
+			cmn_err(CE_WARN, "!Cannot lookup root device: %s",
+			    path);
 #endif
+		}
+		kmem_free(path, plen + MAXPATHLEN);
 	}
-	kmem_free(path, plen + MAXPATHLEN);
+
 	vfs_mnttabvp_setup();
 }
 
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 5caabf8260..d8e9f26bdb 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -190,6 +190,7 @@ uint64_t zfs_arc_meta_limit = 0;
 int zfs_arc_grow_retry = 0;
 int zfs_arc_shrink_shift = 0;
 int zfs_arc_p_min_shift = 0;
+int zfs_disable_dup_eviction = 0;
 
 /*
  * Note that buffers can be in one of 6 states:
@@ -292,6 +293,9 @@ typedef struct arc_stats {
 	kstat_named_t arcstat_l2_size;
 	kstat_named_t arcstat_l2_hdr_size;
 	kstat_named_t arcstat_memory_throttle_count;
+	kstat_named_t arcstat_duplicate_buffers;
+	kstat_named_t arcstat_duplicate_buffers_size;
+	kstat_named_t arcstat_duplicate_reads;
 } arc_stats_t;
 
 static arc_stats_t arc_stats = {
@@ -347,7 +351,10 @@ static arc_stats_t arc_stats = {
 	{ "l2_io_error",		KSTAT_DATA_UINT64 },
 	{ "l2_size",			KSTAT_DATA_UINT64 },
 	{ "l2_hdr_size",		KSTAT_DATA_UINT64 },
-	{ "memory_throttle_count",	KSTAT_DATA_UINT64 }
+	{ "memory_throttle_count",	KSTAT_DATA_UINT64 },
+	{ "duplicate_buffers",		KSTAT_DATA_UINT64 },
+	{ "duplicate_buffers_size",	KSTAT_DATA_UINT64 },
+	{ "duplicate_reads",		KSTAT_DATA_UINT64 }
 };
 
 #define	ARCSTAT(stat)	(arc_stats.stat.value.ui64)
@@ -1362,6 +1369,17 @@ arc_buf_clone(arc_buf_t *from)
 	hdr->b_buf = buf;
 	arc_get_data_buf(buf);
 	bcopy(from->b_data, buf->b_data, size);
+
+	/*
+	 * This buffer already exists in the arc so create a duplicate
+	 * copy for the caller.  If the buffer is associated with user data
+	 * then track the size and number of duplicates.  These stats will be
+	 * updated as duplicate buffers are created and destroyed.
+	 */
+	if (hdr->b_type == ARC_BUFC_DATA) {
+		ARCSTAT_BUMP(arcstat_duplicate_buffers);
+		ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
+	}
 	hdr->b_datacnt += 1;
 	return (buf);
 }
@@ -1460,6 +1478,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
 		ASSERT3U(state->arcs_size, >=, size);
 		atomic_add_64(&state->arcs_size, -size);
 		buf->b_data = NULL;
+
+		/*
+		 * If we're destroying a duplicate buffer make sure
+		 * that the appropriate statistics are updated.
+		 */
+		if (buf->b_hdr->b_datacnt > 1 &&
+		    buf->b_hdr->b_type == ARC_BUFC_DATA) {
+			ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
+			ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
+		}
 		ASSERT(buf->b_hdr->b_datacnt > 0);
 		buf->b_hdr->b_datacnt -= 1;
 	}
@@ -1644,6 +1672,48 @@ arc_buf_size(arc_buf_t *buf)
 }
 
 /*
+ * Called from the DMU to determine if the current buffer should be
+ * evicted. In order to ensure proper locking, the eviction must be initiated
+ * from the DMU. Return true if the buffer is associated with user data and
+ * duplicate buffers still exist.
+ */
+boolean_t
+arc_buf_eviction_needed(arc_buf_t *buf)
+{
+	arc_buf_hdr_t *hdr;
+	boolean_t evict_needed = B_FALSE;
+
+	if (zfs_disable_dup_eviction)
+		return (B_FALSE);
+
+	mutex_enter(&buf->b_evict_lock);
+	hdr = buf->b_hdr;
+	if (hdr == NULL) {
+		/*
+		 * We are in arc_do_user_evicts(); let that function
+		 * perform the eviction.
+		 */
+		ASSERT(buf->b_data == NULL);
+		mutex_exit(&buf->b_evict_lock);
+		return (B_FALSE);
+	} else if (buf->b_data == NULL) {
+		/*
+		 * We have already been added to the arc eviction list;
+		 * recommend eviction.
+		 */
+		ASSERT3P(hdr, ==, &arc_eviction_hdr);
+		mutex_exit(&buf->b_evict_lock);
+		return (B_TRUE);
+	}
+
+	if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
+		evict_needed = B_TRUE;
+
+	mutex_exit(&buf->b_evict_lock);
+	return (evict_needed);
+}
+
+/*
  * Evict buffers from list until we've removed the specified number of
  * bytes.  Move the removed buffers to the appropriate evict state.
  * If the recycle flag is set, then attempt to "recycle" a buffer:
@@ -2638,8 +2708,10 @@ arc_read_done(zio_t *zio)
 	abuf = buf;
 	for (acb = callback_list; acb; acb = acb->acb_next) {
 		if (acb->acb_done) {
-			if (abuf == NULL)
+			if (abuf == NULL) {
+				ARCSTAT_BUMP(arcstat_duplicate_reads);
 				abuf = arc_buf_clone(buf);
+			}
 			acb->acb_buf = abuf;
 			abuf = NULL;
 		}
@@ -3186,6 +3258,16 @@ arc_release(arc_buf_t *buf, void *tag)
 			ASSERT3U(*size, >=, hdr->b_size);
 			atomic_add_64(size, -hdr->b_size);
 		}
+
+		/*
+		 * We're releasing a duplicate user data buffer, update
+		 * our statistics accordingly.
+		 */
+		if (hdr->b_type == ARC_BUFC_DATA) {
+			ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
+			ARCSTAT_INCR(arcstat_duplicate_buffers_size,
+			    -hdr->b_size);
+		}
 		hdr->b_datacnt -= 1;
 		arc_cksum_verify(buf);
 		arc_buf_unwatch(buf);
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index 437e0ac85c..e8bf55c321 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -2089,7 +2089,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
 			dbuf_evict(db);
 		} else {
 			VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
-			if (!DBUF_IS_CACHEABLE(db))
+
+			/*
+			 * A dbuf will be eligible for eviction if either the
+			 * 'primarycache' property is set or a duplicate
+			 * copy of this buffer is already cached in the arc.
+			 *
+			 * In the case of the 'primarycache' a buffer
+			 * is considered for eviction if it matches the
+			 * criteria set in the property.
+			 *
+			 * To decide if our buffer is considered a
+			 * duplicate, we must call into the arc to determine
+			 * if multiple buffers are referencing the same
+			 * block on-disk. If so, then we simply evict
+			 * ourselves.
+			 */
+			if (!DBUF_IS_CACHEABLE(db) ||
+			    arc_buf_eviction_needed(db->db_buf))
 				dbuf_clear(db);
 			else
 				mutex_exit(&db->db_mtx);
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 190b26e5bf..a9308b0c08 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -574,7 +574,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
 		    (dn->dn_indblkshift - SPA_BLKPTRSHIFT);
 
 		while (level++ < maxlevel) {
-			txh->txh_memory_tohold += MIN(blkcnt, (nl1blks >> epbs))
+			txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1)
 			    << dn->dn_indblkshift;
 			blkcnt = 1 + (blkcnt >> epbs);
 		}
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index d9cd70f1c8..968fbd80d6 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -5983,6 +5983,10 @@ spa_sync(spa_t *spa, uint64_t txg)
 
 	tx = dmu_tx_create_assigned(dp, txg);
 
+	spa->spa_sync_starttime = gethrtime();
+	VERIFY(cyclic_reprogram(spa->spa_deadman_cycid,
+	    spa->spa_sync_starttime + spa->spa_deadman_synctime));
+
 	/*
 	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
 	 * set spa_deflate if we have no raid-z vdevs.
@@ -6111,6 +6115,8 @@ spa_sync(spa_t *spa, uint64_t txg)
 	}
 	dmu_tx_commit(tx);
 
+	VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY));
+
 	/*
 	 * Clear the dirty config list.
 	 */
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 30681b6464..a254c8d656 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -26,6 +26,7 @@
 
 #include <sys/zfs_context.h>
 #include <sys/spa_impl.h>
+#include <sys/spa_boot.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
@@ -249,6 +250,26 @@ int zfs_flags = 0;
  */
 int zfs_recover = 0;
 
+extern int zfs_txg_synctime_ms;
+
+/*
+ * Expiration time in units of zfs_txg_synctime_ms. This value has two
+ * meanings. First it is used to determine when the spa_deadman logic
+ * should fire. By default the spa_deadman will fire if spa_sync has
+ * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds).
+ * Secondly, the value determines if an I/O is considered "hung".
+ * Any I/O that has not completed in zfs_deadman_synctime is considered
+ * "hung" resulting in a system panic.
+ * 1000 zfs_txg_synctime_ms (i.e. 1000 seconds).
+ */
+uint64_t zfs_deadman_synctime = 1000ULL;
+
+/*
+ * Override the zfs deadman behavior via /etc/system. By default the
+ * deadman is enabled except on VMware and sparc deployments.
+ */
+int zfs_deadman_enabled = -1;
+
 
 /*
  * ==========================================================================
@@ -418,6 +439,23 @@ spa_lookup(const char *name)
 }
 
 /*
+ * Fires when spa_sync has not completed within zfs_deadman_synctime_ms.
+ * If the zfs_deadman_enabled flag is set then it inspects all vdev queues
+ * looking for potentially hung I/Os.
+ */
+void
+spa_deadman(void *arg)
+{
+	spa_t *spa = arg;
+
+	zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
+	    (gethrtime() - spa->spa_sync_starttime) / NANOSEC,
+	    ++spa->spa_deadman_calls);
+	if (zfs_deadman_enabled)
+		vdev_deadman(spa->spa_root_vdev);
+}
+
+/*
  * Create an uninitialized spa_t with the given name.  Requires
  * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
  * exist by calling spa_lookup() first.
@@ -427,6 +465,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
 {
 	spa_t *spa;
 	spa_config_dirent_t *dp;
+	cyc_handler_t hdlr;
+	cyc_time_t when;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
@@ -458,6 +498,25 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
 	spa->spa_proc = &p0;
 	spa->spa_proc_state = SPA_PROC_NONE;
 
+	hdlr.cyh_func = spa_deadman;
+	hdlr.cyh_arg = spa;
+	hdlr.cyh_level = CY_LOW_LEVEL;
+
+	spa->spa_deadman_synctime = zfs_deadman_synctime *
+	    zfs_txg_synctime_ms * MICROSEC;
+
+	/*
+	 * This determines how often we need to check for hung I/Os after
+	 * the cyclic has already fired. Since checking for hung I/Os is
+	 * an expensive operation we don't want to check too frequently.
+	 * Instead wait for 5 synctimes before checking again.
+	 */
+	when.cyt_interval = 5ULL * zfs_txg_synctime_ms * MICROSEC;
+	when.cyt_when = CY_INFINITY;
+	mutex_enter(&cpu_lock);
+	spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
+	mutex_exit(&cpu_lock);
+
 	refcount_create(&spa->spa_refcount);
 	spa_config_lock_init(spa);
 
@@ -540,6 +599,12 @@ spa_remove(spa_t *spa)
 	nvlist_free(spa->spa_load_info);
 	spa_config_set(spa, NULL);
 
+	mutex_enter(&cpu_lock);
+	if (spa->spa_deadman_cycid != CYCLIC_NONE)
+		cyclic_remove(spa->spa_deadman_cycid);
+	mutex_exit(&cpu_lock);
+	spa->spa_deadman_cycid = CYCLIC_NONE;
+
 	refcount_destroy(&spa->spa_refcount);
 
 	spa_config_lock_destroy(spa);
@@ -1507,6 +1572,12 @@ spa_prev_software_version(spa_t *spa)
 }
 
 uint64_t
+spa_deadman_synctime(spa_t *spa)
+{
+	return (spa->spa_deadman_synctime);
+}
+
+uint64_t
 dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
 {
 	uint64_t asize = DVA_GET_ASIZE(dva);
@@ -1600,7 +1671,9 @@ spa_init(int mode)
 
 	spa_mode_global = mode;
 
-#ifndef _KERNEL
+#ifdef _KERNEL
+	spa_arch_init();
+#else
 	if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
 		arc_procfd = open("/proc/self/ctl", O_WRONLY);
 		if (arc_procfd == -1) {
diff --git a/usr/src/uts/common/fs/zfs/sys/arc.h b/usr/src/uts/common/fs/zfs/sys/arc.h
index 28dbc57275..b109dcafbc 100644
--- a/usr/src/uts/common/fs/zfs/sys/arc.h
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h
@@ -99,6 +99,7 @@ int arc_released(arc_buf_t *buf);
 int arc_has_callback(arc_buf_t *buf);
 void arc_buf_freeze(arc_buf_t *buf);
 void arc_buf_thaw(arc_buf_t *buf);
+boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
 #ifdef ZFS_DEBUG
 int arc_referenced(arc_buf_t *buf);
 #endif
diff --git a/usr/src/uts/common/fs/zfs/sys/sa_impl.h b/usr/src/uts/common/fs/zfs/sys/sa_impl.h
index 6661e47cfc..8ae05ce364 100644
--- a/usr/src/uts/common/fs/zfs/sys/sa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/sa_impl.h
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_SA_IMPL_H
@@ -181,7 +182,7 @@ typedef struct sa_hdr_phys {
  */
 
 #define	SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
-#define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
+#define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 6, 3, 0)
 #define	SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
 { \
 	BF32_SET_SB(x, 10, 6, 3, 0, size); \
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 1043f4038a..172a9f141e 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -604,6 +604,7 @@ extern boolean_t spa_suspended(spa_t *spa);
 extern uint64_t spa_bootfs(spa_t *spa);
 extern uint64_t spa_delegation(spa_t *spa);
 extern objset_t *spa_meta_objset(spa_t *spa);
+extern uint64_t spa_deadman_synctime(spa_t *spa);
 
 /* Miscellaneous support routines */
 extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_boot.h b/usr/src/uts/common/fs/zfs/sys/spa_boot.h
index 1d3622f5a1..8df5072a55 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_boot.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_boot.h
@@ -23,6 +23,10 @@
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
 #ifndef _SYS_SPA_BOOT_H
 #define	_SYS_SPA_BOOT_H
 
@@ -35,6 +39,8 @@ extern "C" {
 extern char *spa_get_bootprop(char *prop);
 extern void spa_free_bootprop(char *prop);
 
+extern void spa_arch_init(void);
+
 #ifdef	__cplusplus
 }
 #endif
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index 027832e858..42ce5556d3 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -227,6 +227,10 @@ struct spa {
 	uint64_t	spa_feat_for_write_obj;	/* required to write to pool */
 	uint64_t	spa_feat_for_read_obj;	/* required to read from pool */
 	uint64_t	spa_feat_desc_obj;	/* Feature descriptions */
+	cyclic_id_t	spa_deadman_cycid;	/* cyclic id */
+	uint64_t	spa_deadman_calls;	/* number of deadman calls */
+	uint64_t	spa_sync_starttime;	/* starting time fo spa_sync */
+	uint64_t	spa_deadman_synctime;	/* deadman expiration timer */
 	/*
 	 * spa_refcnt & spa_config_lock must be the last elements
 	 * because refcount_t changes size based on compilation options.
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index 7e34889b61..5a7836612b 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -79,6 +79,7 @@ extern void vdev_metaslab_fini(vdev_t *vd);
 extern void vdev_metaslab_set_size(vdev_t *);
 extern void vdev_expand(vdev_t *vd, uint64_t txg);
 extern void vdev_split(vdev_t *vd);
+extern void vdev_deadman(vdev_t *vd);
 
 
 extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index c772d954bb..e4c02bde1d 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -105,6 +105,8 @@ struct vdev_queue {
 	avl_tree_t	vq_write_tree;
 	avl_tree_t	vq_pending_tree;
 	zoneid_t	vq_last_zone_id;
+	uint64_t	vq_io_complete_ts;
+	uint64_t	vq_io_delta_ts;
 	kmutex_t	vq_lock;
 };
 
@@ -321,6 +323,14 @@ extern void vdev_set_min_asize(vdev_t *vd);
  */
 extern int zfs_vdev_cache_size;
 
+/*
+ * The vdev_buf_t is used to translate between zio_t and buf_t, and back again.
+ */
+typedef struct vdev_buf {
+	buf_t	vb_buf;		/* buffer that describes the io */
+	zio_t	*vb_io;		/* pointer back to the original zio_t */
+} vdev_buf_t;
+
 #ifdef	__cplusplus
 }
 #endif
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_context.h b/usr/src/uts/common/fs/zfs/sys/zfs_context.h
index fdd0412fee..0dc8d8859c 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_context.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_context.h
@@ -22,8 +22,10 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+
 /*
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_CONTEXT_H
@@ -67,6 +69,7 @@ extern "C" {
 #include <sys/sysevent/dev.h>
 #include <sys/fm/util.h>
 #include <sys/sunddi.h>
+#include <sys/cyclic.h>
 
 #define	CPU_SEQID	(CPU->cpu_seqid)
 
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 4d781ad2a4..86e901be0d 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -240,12 +240,24 @@ typedef struct zinject_record {
 	uint32_t	zi_iotype;
 	int32_t		zi_duration;
 	uint64_t	zi_timer;
+	uint32_t	zi_cmd;
+	uint32_t	zi_pad;
 } zinject_record_t;
 
 #define	ZINJECT_NULL		0x1
 #define	ZINJECT_FLUSH_ARC	0x2
 #define	ZINJECT_UNLOAD_SPA	0x4
 
+typedef enum zinject_type {
+	ZINJECT_UNINITIALIZED,
+	ZINJECT_DATA_FAULT,
+	ZINJECT_DEVICE_FAULT,
+	ZINJECT_LABEL_FAULT,
+	ZINJECT_IGNORED_WRITES,
+	ZINJECT_PANIC,
+	ZINJECT_DELAY_IO,
+} zinject_type_t;
+
 typedef struct zfs_share {
 	uint64_t	z_exportdata;
 	uint64_t	z_sharedata;
diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h
index ce3a983d9f..9c718f691a 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h
@@ -21,8 +21,6 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-/*
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2011 Joyent, Inc.  All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
@@ -406,6 +404,7 @@ struct zio {
 
 	uint64_t	io_offset;
 	uint64_t	io_deadline;
+	uint64_t	io_timestamp;
 	avl_node_t	io_offset_node;
 	avl_node_t	io_deadline_node;
 	avl_tree_t	*io_vdev_tree;
@@ -554,6 +553,7 @@ extern int zio_handle_fault_injection(zio_t *zio, int error);
 extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
 extern int zio_handle_label_injection(zio_t *zio, int error);
 extern void zio_handle_ignored_writes(zio_t *zio);
+extern uint64_t zio_handle_io_delay(zio_t *zio);
 
 /*
  * Checksum ereport functions
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index fa0a579e66..18180ecad3 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -3153,3 +3153,41 @@ vdev_split(vdev_t *vd)
 	}
 	vdev_propagate_state(cvd);
 }
+
+void
+vdev_deadman(vdev_t *vd)
+{
+	for (int c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		vdev_deadman(cvd);
+	}
+
+	if (vd->vdev_ops->vdev_op_leaf) {
+		vdev_queue_t *vq = &vd->vdev_queue;
+
+		mutex_enter(&vq->vq_lock);
+		if (avl_numnodes(&vq->vq_pending_tree) > 0) {
+			spa_t *spa = vd->vdev_spa;
+			zio_t *fio;
+			uint64_t delta;
+
+			/*
+			 * Look at the head of all the pending queues,
+			 * if any I/O has been outstanding for longer than
+			 * the spa_deadman_synctime we panic the system.
+			 */
+			fio = avl_first(&vq->vq_pending_tree);
+			delta = ddi_get_lbolt64() - fio->io_timestamp;
+			if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) {
+				zfs_dbgmsg("SLOW IO: zio timestamp %llu, "
+				    "delta %llu, last io %llu",
+				    fio->io_timestamp, delta,
+				    vq->vq_io_complete_ts);
+				fm_panic("I/O to pool '%s' appears to be "
+				    "hung.", spa_name(spa));
+			}
+		}
+		mutex_exit(&vq->vq_lock);
+	}
+}
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c
index 1ba343226f..dfadeca9d4 100644
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c
@@ -42,11 +42,6 @@
 
 extern ldi_ident_t zfs_li;
 
-typedef struct vdev_disk_buf {
-	buf_t	vdb_buf;
-	zio_t	*vdb_io;
-} vdev_disk_buf_t;
-
 static void
 vdev_disk_hold(vdev_t *vd)
 {
@@ -170,7 +165,7 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
 	/*
 	 * When opening a disk device, we want to preserve the user's original
 	 * intent.  We always want to open the device by the path the user gave
-	 * us, even if it is one of multiple paths to the save device.  But we
+	 * us, even if it is one of multiple paths to the same device.  But we
 	 * also want to be able to survive disks being removed/recabled.
 	 * Therefore the sequence of opening devices is:
 	 *
@@ -416,8 +411,8 @@ vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
 static void
 vdev_disk_io_intr(buf_t *bp)
 {
-	vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
-	zio_t *zio = vdb->vdb_io;
+	vdev_buf_t *vb = (vdev_buf_t *)bp;
+	zio_t *zio = vb->vb_io;
 
 	/*
 	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
@@ -429,7 +424,7 @@ vdev_disk_io_intr(buf_t *bp)
 	if (zio->io_error == 0 && bp->b_resid != 0)
 		zio->io_error = EIO;
 
-	kmem_free(vdb, sizeof (vdev_disk_buf_t));
+	kmem_free(vb, sizeof (vdev_buf_t));
 
 	zio_interrupt(zio);
 }
@@ -460,7 +455,7 @@ vdev_disk_io_start(zio_t *zio)
 {
 	vdev_t *vd = zio->io_vd;
 	vdev_disk_t *dvd = vd->vdev_tsd;
-	vdev_disk_buf_t *vdb;
+	vdev_buf_t *vb;
 	struct dk_callback *dkc;
 	buf_t *bp;
 	int error;
@@ -524,10 +519,10 @@ vdev_disk_io_start(zio_t *zio)
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 
-	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
+	vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
 
-	vdb->vdb_io = zio;
-	bp = &vdb->vdb_buf;
+	vb->vb_io = zio;
+	bp = &vb->vb_buf;
 
 	bioinit(bp);
 	bp->b_flags = B_BUSY | B_NOCACHE |
diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c
index 043fa51294..1fbce5e542 100644
--- a/usr/src/uts/common/fs/zfs/vdev_file.c
+++ b/usr/src/uts/common/fs/zfs/vdev_file.c
@@ -25,6 +25,7 @@
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
+#include <sys/spa_impl.h>
 #include <sys/vdev_file.h>
 #include <sys/vdev_impl.h>
 #include <sys/zio.h>
@@ -140,12 +141,55 @@ vdev_file_close(vdev_t *vd)
 	vd->vdev_tsd = NULL;
 }
 
+/*
+ * Implements the interrupt side for file vdev types. This routine will be
+ * called when the I/O completes allowing us to transfer the I/O to the
+ * interrupt taskqs. For consistency, the code structure mimics disk vdev
+ * types.
+ */
+static void
+vdev_file_io_intr(buf_t *bp)
+{
+	vdev_buf_t *vb = (vdev_buf_t *)bp;
+	zio_t *zio = vb->vb_io;
+
+	zio->io_error = (geterror(bp) != 0 ? EIO : 0);
+	if (zio->io_error == 0 && bp->b_resid != 0)
+		zio->io_error = ENOSPC;
+
+	kmem_free(vb, sizeof (vdev_buf_t));
+	zio_interrupt(zio);
+}
+
+static void
+vdev_file_io_strategy(void *arg)
+{
+	buf_t *bp = arg;
+	vnode_t *vp = bp->b_private;
+	ssize_t resid;
+	int error;
+
+	error = vn_rdwr((bp->b_flags & B_READ) ? UIO_READ : UIO_WRITE,
+	    vp, bp->b_un.b_addr, bp->b_bcount, ldbtob(bp->b_lblkno),
+	    UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
+
+	if (error == 0) {
+		bp->b_resid = resid;
+		biodone(bp);
+	} else {
+		bioerror(bp, error);
+		biodone(bp);
+	}
+}
+
 static int
 vdev_file_io_start(zio_t *zio)
 {
+	spa_t *spa = zio->io_spa;
 	vdev_t *vd = zio->io_vd;
 	vdev_file_t *vf = vd->vdev_tsd;
-	ssize_t resid;
+	vdev_buf_t *vb;
+	buf_t *bp;
 
 	if (zio->io_type == ZIO_TYPE_IOCTL) {
 		/* XXPOLICY */
@@ -166,15 +210,22 @@ vdev_file_io_start(zio_t *zio)
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 
-	zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
-	    UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
-	    zio->io_size, zio->io_offset, UIO_SYSSPACE,
-	    0, RLIM64_INFINITY, kcred, &resid);
+	vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
 
-	if (resid != 0 && zio->io_error == 0)
-		zio->io_error = ENOSPC;
+	vb->vb_io = zio;
+	bp = &vb->vb_buf;
 
-	zio_interrupt(zio);
+	bioinit(bp);
+	bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
+	bp->b_bcount = zio->io_size;
+	bp->b_un.b_addr = zio->io_data;
+	bp->b_lblkno = lbtodb(zio->io_offset);
+	bp->b_bufsize = zio->io_size;
+	bp->b_private = vf->vf_vnode;
+	bp->b_iodone = (int (*)())vdev_file_io_intr;
+
+	taskq_dispatch_ent(spa->spa_zio_taskq[ZIO_TYPE_FREE][ZIO_TASKQ_ISSUE],
+	    vdev_file_io_strategy, bp, 0, &zio->io_tqent);
 
 	return (ZIO_PIPELINE_STOP);
 }
diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c
index 4ea958a9f6..8dec283fee 100644
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c
@@ -24,6 +24,10 @@
  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  */
 
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
 #include <sys/zfs_context.h>
 #include <sys/vdev_impl.h>
 #include <sys/zio.h>
@@ -298,6 +302,7 @@ again:
 		    zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_AGG,
 		    flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
 		    vdev_queue_agg_io_done, NULL);
+		aio->io_timestamp = fio->io_timestamp;
 
 		nio = fio;
 		do {
@@ -369,7 +374,8 @@ vdev_queue_io(zio_t *zio)
 
 	mutex_enter(&vq->vq_lock);
 
-	zio->io_deadline = (ddi_get_lbolt64() >> zfs_vdev_time_shift) +
+	zio->io_timestamp = ddi_get_lbolt64();
+	zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
 	    zio->io_priority;
 
 	vdev_queue_io_add(vq, zio);
@@ -394,10 +400,16 @@ vdev_queue_io_done(zio_t *zio)
 {
 	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
 
+	if (zio_injection_enabled)
+		delay(SEC_TO_TICK(zio_handle_io_delay(zio)));
+
 	mutex_enter(&vq->vq_lock);
 
 	avl_remove(&vq->vq_pending_tree, zio);
 
+	vq->vq_io_complete_ts = ddi_get_lbolt64();
+	vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
+
 	for (int i = 0; i < zfs_vdev_ramp_rate; i++) {
 		zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending);
 		if (nio == NULL)
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index 2292f658b3..c7bfbbaec4 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -50,6 +50,7 @@
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
+#include <sys/sa_impl.h>
 #include <sys/varargs.h>
 #include <sys/policy.h>
 #include <sys/atomic.h>
@@ -64,7 +65,6 @@
 #include <sys/dnlc.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa_boot.h>
-#include <sys/sa.h>
 #include "zfs_comutil.h"
 
 int zfsfstype;
@@ -578,7 +578,6 @@ static int
 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
 {
-	znode_phys_t *znp = data;
 	int error = 0;
 
 	/*
@@ -597,20 +596,18 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
 		return (EEXIST);
 
 	if (bonustype == DMU_OT_ZNODE) {
+		znode_phys_t *znp = data;
 		*userp = znp->zp_uid;
 		*groupp = znp->zp_gid;
 	} else {
 		int hdrsize;
+		sa_hdr_phys_t *sap = data;
+		sa_hdr_phys_t sa = *sap;
+		boolean_t swap = B_FALSE;
 
 		ASSERT(bonustype == DMU_OT_SA);
-		hdrsize = sa_hdrsize(data);
 
-		if (hdrsize != 0) {
-			*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
-			    SA_UID_OFFSET));
-			*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
-			    SA_GID_OFFSET));
-		} else {
+		if (sa.sa_magic == 0) {
 			/*
 			 * This should only happen for newly created
 			 * files that haven't had the znode data filled
@@ -618,6 +615,25 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
 			 */
 			*userp = 0;
 			*groupp = 0;
+			return (0);
+		}
+		if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
+			sa.sa_magic = SA_MAGIC;
+			sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
+			swap = B_TRUE;
+		} else {
+			VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
+		}
+
+		hdrsize = sa_hdrsize(&sa);
+		VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
+		*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
+		    SA_UID_OFFSET));
+		*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
+		    SA_GID_OFFSET));
+		if (swap) {
+			*userp = BSWAP_64(*userp);
+			*groupp = BSWAP_64(*groupp);
 		}
 	}
 	return (error);
diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c
index 0c86cac427..92dc05f4a0 100644
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c
@@ -1947,13 +1947,16 @@ zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
  * or not the object is an extended attribute directory.
  */
 static int
-zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp,
-    int *is_xattrdir)
+zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
+    uint64_t *pobjp, int *is_xattrdir)
 {
 	uint64_t parent;
 	uint64_t pflags;
 	uint64_t mode;
+	uint64_t parent_mode;
 	sa_bulk_attr_t bulk[3];
+	sa_handle_t *sa_hdl;
+	dmu_buf_t *sa_db;
 	int count = 0;
 	int error;
 
@@ -1967,9 +1970,32 @@ zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp,
 	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
 		return (error);
 
-	*pobjp = parent;
+	/*
+	 * When a link is removed its parent pointer is not changed and will
+	 * be invalid.  There are two cases where a link is removed but the
+	 * file stays around, when it goes to the delete queue and when there
+	 * are additional links.
+	 */
+	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
+	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+	if (error != 0)
+		return (error);
+
 	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
 
+	/*
+	 * Extended attributes can be applied to files, directories, etc.
+	 * Otherwise the parent must be a directory.
+	 */
+	if (!*is_xattrdir && !S_ISDIR(parent_mode))
+		return (EINVAL);
+
+	*pobjp = parent;
+
 	return (0);
 }
 
@@ -2018,7 +2044,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
 		if (prevdb)
 			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
 
-		if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
+		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
 		    &is_xattrdir)) != 0)
 			break;
 
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index e2e98b7896..00964aa83f 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -2928,7 +2928,7 @@ zio_done(zio_t *zio)
 			 * Hand it off to the otherwise-unused claim taskq.
 			 */
 			ASSERT(zio->io_tqent.tqent_next == NULL);
-			(void) taskq_dispatch_ent(
+			taskq_dispatch_ent(
 			    spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
 			    (task_func_t *)zio_reexecute, zio, 0,
 			    &zio->io_tqent);
diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c
index 9ae7d1f697..a9d4ab4070 100644
--- a/usr/src/uts/common/fs/zfs/zio_inject.c
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
@@ -147,14 +148,8 @@ zio_handle_fault_injection(zio_t *zio, int error)
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
-		/* Ignore errors not destined for this pool */
-		if (zio->io_spa != handler->zi_spa)
-			continue;
-
-		/* Ignore device errors and panic injection */
-		if (handler->zi_record.zi_guid != 0 ||
-		    handler->zi_record.zi_func[0] != '\0' ||
-		    handler->zi_record.zi_duration != 0)
+		if (zio->io_spa != handler->zi_spa ||
+		    handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT)
 			continue;
 
 		/* If this handler matches, return EIO */
@@ -197,10 +192,7 @@ zio_handle_label_injection(zio_t *zio, int error)
 		uint64_t start = handler->zi_record.zi_start;
 		uint64_t end = handler->zi_record.zi_end;
 
-		/* Ignore device only faults or panic injection */
-		if (handler->zi_record.zi_start == 0 ||
-		    handler->zi_record.zi_func[0] != '\0' ||
-		    handler->zi_record.zi_duration != 0)
+		if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT)
 			continue;
 
 		/*
@@ -246,13 +238,7 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
-		/*
-		 * Ignore label specific faults, panic injection
-		 * or fake writes
-		 */
-		if (handler->zi_record.zi_start != 0 ||
-		    handler->zi_record.zi_func[0] != '\0' ||
-		    handler->zi_record.zi_duration != 0)
+		if (handler->zi_record.zi_cmd != ZINJECT_DEVICE_FAULT)
 			continue;
 
 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
@@ -316,10 +302,8 @@ zio_handle_ignored_writes(zio_t *zio)
 	    handler = list_next(&inject_handlers, handler)) {
 
 		/* Ignore errors not destined for this pool */
-		if (zio->io_spa != handler->zi_spa)
-			continue;
-
-		if (handler->zi_record.zi_duration == 0)
+		if (zio->io_spa != handler->zi_spa ||
+		    handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
 			continue;
 
 		/*
@@ -355,11 +339,8 @@ spa_handle_ignored_writes(spa_t *spa)
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
-		/* Ignore errors not destined for this pool */
-		if (spa != handler->zi_spa)
-			continue;
-
-		if (handler->zi_record.zi_duration == 0)
+		if (spa != handler->zi_spa ||
+		    handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
 			continue;
 
 		if (handler->zi_record.zi_duration > 0) {
@@ -379,6 +360,34 @@ spa_handle_ignored_writes(spa_t *spa)
 	rw_exit(&inject_lock);
 }
 
+uint64_t
+zio_handle_io_delay(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+	inject_handler_t *handler;
+	uint64_t seconds = 0;
+
+	if (zio_injection_enabled == 0)
+		return (0);
+
+	rw_enter(&inject_lock, RW_READER);
+
+	for (handler = list_head(&inject_handlers); handler != NULL;
+	    handler = list_next(&inject_handlers, handler)) {
+
+		if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
+			continue;
+
+		if (vd->vdev_guid == handler->zi_record.zi_guid) {
+			seconds = handler->zi_record.zi_timer;
+			break;
+		}
+
+	}
+	rw_exit(&inject_lock);
+	return (seconds);
+}
+
 /*
  * Create a new handler for the given record.  We add it to the list, adding
  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
diff --git a/usr/src/uts/common/io/1394/adapters/hci1394_extern.c b/usr/src/uts/common/io/1394/adapters/hci1394_extern.c
index 1da7580b6b..2faf274fb3 100644
--- a/usr/src/uts/common/io/1394/adapters/hci1394_extern.c
+++ b/usr/src/uts/common/io/1394/adapters/hci1394_extern.c
@@ -24,8 +24,6 @@
  * All rights reserved.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * hci1394_extern.c
  *    Central location for externs.  There are two exceptions to this,
@@ -52,7 +50,7 @@ uint32_t hci1394_split_timeout = 800;
 
 
 /*
- * 1394 address map for OpenHCI adpaters.
+ * 1394 address map for OpenHCI adapters.
  *
  * This is what is reported to the services layer.  The hci1394 driver does not
  * modify the HW to reflect this.  This should reflect what the OpenHCI 1.0 HW
diff --git a/usr/src/uts/common/io/blkdev/blkdev.c b/usr/src/uts/common/io/blkdev/blkdev.c
index 3410fad1ec..20e3a5737e 100644
--- a/usr/src/uts/common/io/blkdev/blkdev.c
+++ b/usr/src/uts/common/io/blkdev/blkdev.c
@@ -20,8 +20,9 @@
  */
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011, 2012 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
+ * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  */
 
 #include <sys/types.h>
@@ -503,7 +504,7 @@ bd_xfer_ctor(void *buf, void *arg, int kmflag)
 	bd_t		*bd = arg;
 	int		(*dcb)(caddr_t);
 
-	if (kmflag == KM_SLEEP) {
+	if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
 		dcb = DDI_DMA_SLEEP;
 	} else {
 		dcb = DDI_DMA_DONTWAIT;
diff --git a/usr/src/uts/common/io/e1000g/e1000g_tx.c b/usr/src/uts/common/io/e1000g/e1000g_tx.c
index a696aec5a5..1f8a51d291 100644
--- a/usr/src/uts/common/io/e1000g/e1000g_tx.c
+++ b/usr/src/uts/common/io/e1000g/e1000g_tx.c
@@ -668,10 +668,12 @@ e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list,
 	 * drivers do not have this issue because they (harmlessly) set the
 	 * POPTS field on every data descriptor to be the intended options for
 	 * the entire packet.  To circumvent this QEMU bug, we engage in this
-	 * same behavior iff our type matches that which is emulated by QEMU
-	 * (the 82540).
+	 * same behavior iff the subsystem vendor and device IDs indicate that
+	 * this is an emulated QEMU device (1af4,1100).
 	 */
-	if (hw->mac.type == e1000_82540 && cur_context->cksum_flags) {
+	if (hw->subsystem_vendor_id == 0x1af4 &&
+	    hw->subsystem_device_id == 0x1100 &&
+	    cur_context->cksum_flags) {
 		if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM)
 			zeroed.upper.fields.popts |= E1000_TXD_POPTS_IXSM;
 
diff --git a/usr/src/uts/common/io/igb/igb_regs.h b/usr/src/uts/common/io/igb/igb_regs.h
index a2b2450b67..b554ef6d23 100644
--- a/usr/src/uts/common/io/igb/igb_regs.h
+++ b/usr/src/uts/common/io/igb/igb_regs.h
@@ -216,7 +216,7 @@ extern "C" {
 /* Packet Buffer DWORD (_n) */
 #define	E1000_PBSLAD(_n)	(0x03110 + (0x4 * (_n)))
 #define	E1000_TXPBS	0x03404  /* Tx Packet Buffer Size - RW */
-/* Same as TXPBS, renamed for newer adpaters - RW */
+/* Same as TXPBS, renamed for newer adapters - RW */
 #define	E1000_ITPBS	0x03404
 #define	E1000_TDFH	0x03410  /* Tx Data FIFO Head - RW */
 #define	E1000_TDFT	0x03418  /* Tx Data FIFO Tail - RW */
diff --git a/usr/src/uts/common/io/ipw/ipw2100.c b/usr/src/uts/common/io/ipw/ipw2100.c
index 2559c64762..8afe91725e 100644
--- a/usr/src/uts/common/io/ipw/ipw2100.c
+++ b/usr/src/uts/common/io/ipw/ipw2100.c
@@ -1273,7 +1273,7 @@ ipw2100_chip_reset(struct ipw2100_softc *sc)
 	ipw2100_master_stop(sc);
 
 	/*
-	 * move adatper to DO state
+	 * move adapter to DO state
 	 */
 	tmp = ipw2100_csr_get32(sc, IPW2100_CSR_CTL);
 	ipw2100_csr_put32(sc, IPW2100_CSR_CTL, tmp | IPW2100_CTL_INIT);
diff --git a/usr/src/uts/common/io/iwi/ipw2200.h b/usr/src/uts/common/io/iwi/ipw2200.h
index b7676ffffa..58c3701ba2 100644
--- a/usr/src/uts/common/io/iwi/ipw2200.h
+++ b/usr/src/uts/common/io/iwi/ipw2200.h
@@ -38,7 +38,7 @@ extern "C" {
 #endif
 
 /*
- * Intel Wireless PRO/2200 mini-pci adpater drier
+ * Intel Wireless PRO/2200 mini-pci adapter drier
  * ipw2200.h: common definitions and interface to user land application
  */
 #include <sys/types.h>
diff --git a/usr/src/uts/common/io/vioblk/vioblk.c b/usr/src/uts/common/io/vioblk/vioblk.c
new file mode 100644
index 0000000000..4d63b7b7ea
--- /dev/null
+++ b/usr/src/uts/common/io/vioblk/vioblk.c
@@ -0,0 +1,1072 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012, Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
+ */
+
+
+#include <sys/modctl.h>
+#include <sys/blkdev.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strsubr.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/ksynch.h>
+#include <sys/stat.h>
+#include <sys/modctl.h>
+#include <sys/debug.h>
+#include <sys/pci.h>
+#include <sys/sysmacros.h>
+#include "virtiovar.h"
+#include "virtioreg.h"
+
+/* Feature bits */
+#define	VIRTIO_BLK_F_BARRIER	(1<<0)
+#define	VIRTIO_BLK_F_SIZE_MAX	(1<<1)
+#define	VIRTIO_BLK_F_SEG_MAX	(1<<2)
+#define	VIRTIO_BLK_F_GEOMETRY	(1<<4)
+#define	VIRTIO_BLK_F_RO		(1<<5)
+#define	VIRTIO_BLK_F_BLK_SIZE	(1<<6)
+#define	VIRTIO_BLK_F_SCSI	(1<<7)
+#define	VIRTIO_BLK_F_FLUSH	(1<<9)
+#define	VIRTIO_BLK_F_TOPOLOGY	(1<<10)
+
+/* Configuration registers */
+#define	VIRTIO_BLK_CONFIG_CAPACITY	0 /* 64bit */
+#define	VIRTIO_BLK_CONFIG_SIZE_MAX	8 /* 32bit */
+#define	VIRTIO_BLK_CONFIG_SEG_MAX	12 /* 32bit */
+#define	VIRTIO_BLK_CONFIG_GEOMETRY_C	16 /* 16bit */
+#define	VIRTIO_BLK_CONFIG_GEOMETRY_H	18 /* 8bit */
+#define	VIRTIO_BLK_CONFIG_GEOMETRY_S	19 /* 8bit */
+#define	VIRTIO_BLK_CONFIG_BLK_SIZE	20 /* 32bit */
+#define	VIRTIO_BLK_CONFIG_TOPOLOGY	24 /* 32bit */
+
+/* Command */
+#define	VIRTIO_BLK_T_IN			0
+#define	VIRTIO_BLK_T_OUT		1
+#define	VIRTIO_BLK_T_SCSI_CMD		2
+#define	VIRTIO_BLK_T_SCSI_CMD_OUT	3
+#define	VIRTIO_BLK_T_FLUSH		4
+#define	VIRTIO_BLK_T_FLUSH_OUT		5
+#define	VIRTIO_BLK_T_GET_ID		8
+#define	VIRTIO_BLK_T_BARRIER		0x80000000
+
+#define	VIRTIO_BLK_ID_BYTES	20 /* devid */
+
+/* Statuses */
+#define	VIRTIO_BLK_S_OK		0
+#define	VIRTIO_BLK_S_IOERR	1
+#define	VIRTIO_BLK_S_UNSUPP	2
+
+#define	DEF_MAXINDIRECT		(128)
+#define	DEF_MAXSECTOR		(4096)
+
+#define	VIOBLK_POISON		0xdead0001dead0001
+
+/*
+ * Static Variables.
+ */
+static char vioblk_ident[] = "VirtIO block driver";
+
+/* Request header structure */
+struct vioblk_req_hdr {
+	uint32_t		type;   /* VIRTIO_BLK_T_* */
+	uint32_t		ioprio;
+	uint64_t		sector;
+};
+
+struct vioblk_req {
+	struct vioblk_req_hdr	hdr;
+	uint8_t			status;
+	uint8_t			unused[3];
+	unsigned int		ndmac;
+	ddi_dma_handle_t	dmah;
+	ddi_dma_handle_t	bd_dmah;
+	ddi_dma_cookie_t	dmac;
+	bd_xfer_t		*xfer;
+};
+
+struct vioblk_stats {
+	struct kstat_named	sts_rw_outofmemory;
+	struct kstat_named	sts_rw_badoffset;
+	struct kstat_named	sts_rw_queuemax;
+	struct kstat_named	sts_rw_cookiesmax;
+	struct kstat_named	sts_rw_cacheflush;
+	struct kstat_named	sts_intr_queuemax;
+	struct kstat_named	sts_intr_total;
+	struct kstat_named	sts_io_errors;
+	struct kstat_named	sts_unsupp_errors;
+	struct kstat_named	sts_nxio_errors;
+};
+
+struct vioblk_lstats {
+	uint64_t		rw_cacheflush;
+	uint64_t		intr_total;
+	unsigned int		rw_cookiesmax;
+	unsigned int		intr_queuemax;
+	unsigned int		io_errors;
+	unsigned int		unsupp_errors;
+	unsigned int		nxio_errors;
+};
+
+struct vioblk_softc {
+	dev_info_t		*sc_dev; /* mirrors virtio_softc->sc_dev */
+	struct virtio_softc	sc_virtio;
+	struct virtqueue	*sc_vq;
+	bd_handle_t		bd_h;
+	struct vioblk_req	*sc_reqs;
+	struct vioblk_stats	*ks_data;
+	kstat_t			*sc_intrstat;
+	uint64_t		sc_capacity;
+	uint64_t		sc_nblks;
+	struct vioblk_lstats	sc_stats;
+	short			sc_blkflags;
+	boolean_t		sc_in_poll_mode;
+	boolean_t		sc_readonly;
+	int			sc_blk_size;
+	int			sc_seg_max;
+	int			sc_seg_size_max;
+	kmutex_t		lock_devid;
+	kcondvar_t		cv_devid;
+	char			devid[VIRTIO_BLK_ID_BYTES + 1];
+};
+
+static int vioblk_read(void *arg, bd_xfer_t *xfer);
+static int vioblk_write(void *arg, bd_xfer_t *xfer);
+static int vioblk_flush(void *arg, bd_xfer_t *xfer);
+static void vioblk_driveinfo(void *arg, bd_drive_t *drive);
+static int vioblk_mediainfo(void *arg, bd_media_t *media);
+static int vioblk_devid_init(void *, dev_info_t *, ddi_devid_t *);
+uint_t vioblk_int_handler(caddr_t arg1, caddr_t arg2);
+
+static bd_ops_t vioblk_ops = {
+	BD_OPS_VERSION_0,
+	vioblk_driveinfo,
+	vioblk_mediainfo,
+	vioblk_devid_init,
+	vioblk_flush,
+	vioblk_read,
+	vioblk_write,
+};
+
+static int vioblk_quiesce(dev_info_t *);
+static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
+static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
+
+static struct dev_ops vioblk_dev_ops = {
+	DEVO_REV,
+	0,
+	ddi_no_info,
+	nulldev,	/* identify */
+	nulldev,	/* probe */
+	vioblk_attach,	/* attach */
+	vioblk_detach,	/* detach */
+	nodev,		/* reset */
+	NULL,		/* cb_ops */
+	NULL,		/* bus_ops */
+	NULL,		/* power */
+	vioblk_quiesce	/* quiesce */
+};
+
+
+
+/* Standard Module linkage initialization for a Streams driver */
+extern struct mod_ops mod_driverops;
+
+static struct modldrv modldrv = {
+	&mod_driverops,		/* Type of module.  This one is a driver */
+	vioblk_ident,    /* short description */
+	&vioblk_dev_ops	/* driver specific ops */
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	{
+		(void *)&modldrv,
+		NULL,
+	},
+};
+
+ddi_device_acc_attr_t vioblk_attr = {
+	DDI_DEVICE_ATTR_V0,
+	DDI_NEVERSWAP_ACC,	/* virtio is always native byte order */
+	DDI_STORECACHING_OK_ACC,
+	DDI_DEFAULT_ACC
+};
+
+/* DMA attr for the header/status blocks. */
+static ddi_dma_attr_t vioblk_req_dma_attr = {
+	DMA_ATTR_V0,			/* dma_attr version	*/
+	0,				/* dma_attr_addr_lo	*/
+	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
+	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
+	1,				/* dma_attr_align	*/
+	1,				/* dma_attr_burstsizes	*/
+	1,				/* dma_attr_minxfer	*/
+	0xFFFFFFFFull,			/* dma_attr_maxxfer	*/
+	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
+	1,				/* dma_attr_sgllen	*/
+	1,				/* dma_attr_granular	*/
+	0,				/* dma_attr_flags	*/
+};
+
+/* DMA attr for the data blocks. */
+static ddi_dma_attr_t vioblk_bd_dma_attr = {
+	DMA_ATTR_V0,			/* dma_attr version	*/
+	0,				/* dma_attr_addr_lo	*/
+	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
+	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
+	1,				/* dma_attr_align	*/
+	1,				/* dma_attr_burstsizes	*/
+	1,				/* dma_attr_minxfer	*/
+	0,				/* dma_attr_maxxfer, set in attach */
+	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
+	0,				/* dma_attr_sgllen, set in attach */
+	1,				/* dma_attr_granular	*/
+	0,				/* dma_attr_flags	*/
+};
+
+static int
+vioblk_rw(struct vioblk_softc *sc, bd_xfer_t *xfer, int type,
+    uint32_t len)
+{
+	struct vioblk_req *req;
+	struct vq_entry *ve_hdr;
+	int total_cookies, write;
+
+	write = (type == VIRTIO_BLK_T_OUT ||
+	    type == VIRTIO_BLK_T_FLUSH_OUT) ? 1 : 0;
+	total_cookies = 2;
+
+	if ((xfer->x_blkno + xfer->x_nblks) > sc->sc_nblks) {
+		sc->ks_data->sts_rw_badoffset.value.ui64++;
+		return (EINVAL);
+	}
+
+	/* allocate top entry */
+	ve_hdr = vq_alloc_entry(sc->sc_vq);
+	if (!ve_hdr) {
+		sc->ks_data->sts_rw_outofmemory.value.ui64++;
+		return (ENOMEM);
+	}
+
+	/* getting request */
+	req = &sc->sc_reqs[ve_hdr->qe_index];
+	req->hdr.type = type;
+	req->hdr.ioprio = 0;
+	req->hdr.sector = xfer->x_blkno;
+	req->xfer = xfer;
+
+	/* Header */
+	virtio_ve_add_indirect_buf(ve_hdr, req->dmac.dmac_laddress,
+	    sizeof (struct vioblk_req_hdr), B_TRUE);
+
+	/* Payload */
+	if (len > 0) {
+		virtio_ve_add_cookie(ve_hdr, xfer->x_dmah, xfer->x_dmac,
+		    xfer->x_ndmac, write ? B_TRUE : B_FALSE);
+		total_cookies += xfer->x_ndmac;
+	}
+
+	/* Status */
+	virtio_ve_add_indirect_buf(ve_hdr,
+	    req->dmac.dmac_laddress + sizeof (struct vioblk_req_hdr),
+	    sizeof (uint8_t), B_FALSE);
+
+	/* sending the whole chain to the device */
+	virtio_push_chain(ve_hdr, B_TRUE);
+
+	if (sc->sc_stats.rw_cookiesmax < total_cookies)
+		sc->sc_stats.rw_cookiesmax = total_cookies;
+
+	return (DDI_SUCCESS);
+}
+
+/*
+ * Now in polling mode. Interrupts are off, so we
+ * 1) poll for the already queued requests to complete.
+ * 2) push our request.
+ * 3) wait for our request to complete.
+ */
+static int
+vioblk_rw_poll(struct vioblk_softc *sc, bd_xfer_t *xfer,
+    int type, uint32_t len)
+{
+	clock_t tmout;
+	int ret;
+
+	ASSERT(xfer->x_flags & BD_XFER_POLL);
+
+	/* Prevent a hard hang. */
+	tmout = drv_usectohz(30000000);
+
+	/* Poll for an empty queue */
+	while (vq_num_used(sc->sc_vq)) {
+		/* Check if any pending requests completed. */
+		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
+		if (ret != DDI_INTR_CLAIMED) {
+			drv_usecwait(10);
+			tmout -= 10;
+			return (ETIMEDOUT);
+		}
+	}
+
+	ret = vioblk_rw(sc, xfer, type, len);
+	if (ret)
+		return (ret);
+
+	tmout = drv_usectohz(30000000);
+	/* Poll for an empty queue again. */
+	while (vq_num_used(sc->sc_vq)) {
+		/* Check if any pending requests completed. */
+		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
+		if (ret != DDI_INTR_CLAIMED) {
+			drv_usecwait(10);
+			tmout -= 10;
+			return (ETIMEDOUT);
+		}
+	}
+
+	return (DDI_SUCCESS);
+}
+
+static int
+vioblk_read(void *arg, bd_xfer_t *xfer)
+{
+	int ret;
+	struct vioblk_softc *sc = (void *)arg;
+
+	if (xfer->x_flags & BD_XFER_POLL) {
+		if (!sc->sc_in_poll_mode) {
+			virtio_stop_vq_intr(sc->sc_vq);
+			sc->sc_in_poll_mode = 1;
+		}
+
+		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_IN,
+		    xfer->x_nblks * DEV_BSIZE);
+	} else {
+		if (sc->sc_in_poll_mode) {
+			virtio_start_vq_intr(sc->sc_vq);
+			sc->sc_in_poll_mode = 0;
+		}
+
+		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_IN,
+		    xfer->x_nblks * DEV_BSIZE);
+	}
+
+	return (ret);
+}
+
+static int
+vioblk_write(void *arg, bd_xfer_t *xfer)
+{
+	int ret;
+	struct vioblk_softc *sc = (void *)arg;
+
+	if (xfer->x_flags & BD_XFER_POLL) {
+		if (!sc->sc_in_poll_mode) {
+			virtio_stop_vq_intr(sc->sc_vq);
+			sc->sc_in_poll_mode = 1;
+		}
+
+		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_OUT,
+		    xfer->x_nblks * DEV_BSIZE);
+	} else {
+		if (sc->sc_in_poll_mode) {
+			virtio_start_vq_intr(sc->sc_vq);
+			sc->sc_in_poll_mode = 0;
+		}
+
+		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_OUT,
+		    xfer->x_nblks * DEV_BSIZE);
+	}
+	return (ret);
+}
+
+static int
+vioblk_flush(void *arg, bd_xfer_t *xfer)
+{
+	int ret;
+	struct vioblk_softc *sc = (void *)arg;
+
+	ASSERT((xfer->x_flags & BD_XFER_POLL) == 0);
+
+	ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_FLUSH_OUT,
+	    xfer->x_nblks * DEV_BSIZE);
+
+	if (!ret)
+		sc->sc_stats.rw_cacheflush++;
+
+	return (ret);
+}
+
+
+static void
+vioblk_driveinfo(void *arg, bd_drive_t *drive)
+{
+	struct vioblk_softc *sc = (void *)arg;
+
+	drive->d_qsize = sc->sc_vq->vq_num;
+	drive->d_removable = B_FALSE;
+	drive->d_hotpluggable = B_TRUE;
+	drive->d_target = 0;
+	drive->d_lun = 0;
+}
+
+static int
+vioblk_mediainfo(void *arg, bd_media_t *media)
+{
+	struct vioblk_softc *sc = (void *)arg;
+
+	media->m_nblks = sc->sc_nblks;
+	media->m_blksize = DEV_BSIZE;
+	media->m_readonly = sc->sc_readonly;
+	return (0);
+}
+
+static int
+vioblk_devid_init(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
+{
+	struct vioblk_softc *sc = (void *)arg;
+	clock_t deadline;
+	int ret;
+	bd_xfer_t xfer;
+
+	deadline = ddi_get_lbolt() + (clock_t)drv_usectohz(3 * 1000000);
+	(void) memset(&xfer, 0, sizeof (bd_xfer_t));
+	xfer.x_nblks = 1;
+
+	ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_bd_dma_attr,
+	    DDI_DMA_SLEEP, NULL, &xfer.x_dmah);
+	if (ret != DDI_SUCCESS)
+		goto out_alloc;
+
+	ret = ddi_dma_addr_bind_handle(xfer.x_dmah, NULL, (caddr_t)&sc->devid,
+	    VIRTIO_BLK_ID_BYTES, DDI_DMA_READ | DDI_DMA_CONSISTENT,
+	    DDI_DMA_SLEEP, NULL, &xfer.x_dmac, &xfer.x_ndmac);
+	if (ret != DDI_DMA_MAPPED) {
+		ret = DDI_FAILURE;
+		goto out_map;
+	}
+
+	mutex_enter(&sc->lock_devid);
+
+	ret = vioblk_rw(sc, &xfer, VIRTIO_BLK_T_GET_ID,
+	    VIRTIO_BLK_ID_BYTES);
+	if (ret) {
+		mutex_exit(&sc->lock_devid);
+		goto out_rw;
+	}
+
+	/* wait for reply */
+	ret = cv_timedwait(&sc->cv_devid, &sc->lock_devid, deadline);
+	mutex_exit(&sc->lock_devid);
+
+	(void) ddi_dma_unbind_handle(xfer.x_dmah);
+	ddi_dma_free_handle(&xfer.x_dmah);
+
+	/* timeout */
+	if (ret < 0) {
+		dev_err(devinfo, CE_WARN, "Cannot get devid from the device");
+		return (DDI_FAILURE);
+	}
+
+	ret = ddi_devid_init(devinfo, DEVID_ATA_SERIAL,
+	    VIRTIO_BLK_ID_BYTES, sc->devid, devid);
+	if (ret != DDI_SUCCESS) {
+		dev_err(devinfo, CE_WARN, "Cannot build devid from the device");
+		return (ret);
+	}
+
+	dev_debug(sc->sc_dev, CE_NOTE,
+	    "devid %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x",
+	    sc->devid[0], sc->devid[1], sc->devid[2], sc->devid[3],
+	    sc->devid[4], sc->devid[5], sc->devid[6], sc->devid[7],
+	    sc->devid[8], sc->devid[9], sc->devid[10], sc->devid[11],
+	    sc->devid[12], sc->devid[13], sc->devid[14], sc->devid[15],
+	    sc->devid[16], sc->devid[17], sc->devid[18], sc->devid[19]);
+
+	return (0);
+
+out_rw:
+	(void) ddi_dma_unbind_handle(xfer.x_dmah);
+out_map:
+	ddi_dma_free_handle(&xfer.x_dmah);
+out_alloc:
+	return (ret);
+}
+
+static void
+vioblk_show_features(struct vioblk_softc *sc, const char *prefix,
+    uint32_t features)
+{
+	char buf[512];
+	char *bufp = buf;
+	char *bufend = buf + sizeof (buf);
+
+	/* LINTED E_PTRDIFF_OVERFLOW */
+	bufp += snprintf(bufp, bufend - bufp, prefix);
+
+	/* LINTED E_PTRDIFF_OVERFLOW */
+	bufp += virtio_show_features(features, bufp, bufend - bufp);
+
+
+	/* LINTED E_PTRDIFF_OVERFLOW */
+	bufp += snprintf(bufp, bufend - bufp, "Vioblk ( ");
+
+	if (features & VIRTIO_BLK_F_BARRIER)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "BARRIER ");
+	if (features & VIRTIO_BLK_F_SIZE_MAX)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "SIZE_MAX ");
+	if (features & VIRTIO_BLK_F_SEG_MAX)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "SEG_MAX ");
+	if (features & VIRTIO_BLK_F_GEOMETRY)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "GEOMETRY ");
+	if (features & VIRTIO_BLK_F_RO)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "RO ");
+	if (features & VIRTIO_BLK_F_BLK_SIZE)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "BLK_SIZE ");
+	if (features & VIRTIO_BLK_F_SCSI)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "SCSI ");
+	if (features & VIRTIO_BLK_F_FLUSH)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "FLUSH ");
+	if (features & VIRTIO_BLK_F_TOPOLOGY)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		bufp += snprintf(bufp, bufend - bufp, "TOPOLOGY ");
+
+	/* LINTED E_PTRDIFF_OVERFLOW */
+	bufp += snprintf(bufp, bufend - bufp, ")");
+	*bufp = '\0';
+
+	dev_debug(sc->sc_dev, CE_NOTE, "%s", buf);
+}
+
+static int
+vioblk_dev_features(struct vioblk_softc *sc)
+{
+	uint32_t host_features;
+
+	host_features = virtio_negotiate_features(&sc->sc_virtio,
+	    VIRTIO_BLK_F_RO |
+	    VIRTIO_BLK_F_GEOMETRY |
+	    VIRTIO_BLK_F_BLK_SIZE |
+	    VIRTIO_BLK_F_FLUSH |
+	    VIRTIO_BLK_F_SEG_MAX |
+	    VIRTIO_BLK_F_SIZE_MAX |
+	    VIRTIO_F_RING_INDIRECT_DESC);
+
+	vioblk_show_features(sc, "Host features: ", host_features);
+	vioblk_show_features(sc, "Negotiated features: ",
+	    sc->sc_virtio.sc_features);
+
+	if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
+		dev_err(sc->sc_dev, CE_NOTE,
+		    "Host does not support RING_INDIRECT_DESC, bye.");
+		return (DDI_FAILURE);
+	}
+
+	return (DDI_SUCCESS);
+}
+
+/* ARGSUSED */
+uint_t
+vioblk_int_handler(caddr_t arg1, caddr_t arg2)
+{
+	struct virtio_softc *vsc = (void *)arg1;
+	struct vioblk_softc *sc = container_of(vsc,
+	    struct vioblk_softc, sc_virtio);
+	struct vq_entry *ve;
+	uint32_t len;
+	int i = 0, error;
+
+	while ((ve = virtio_pull_chain(sc->sc_vq, &len))) {
+		struct vioblk_req *req = &sc->sc_reqs[ve->qe_index];
+		bd_xfer_t *xfer = req->xfer;
+		uint8_t status = req->status;
+		uint32_t type = req->hdr.type;
+
+		if (req->xfer == (void *)VIOBLK_POISON) {
+			dev_err(sc->sc_dev, CE_WARN, "Poisoned descriptor!");
+			virtio_free_chain(ve);
+			return (DDI_INTR_CLAIMED);
+		}
+
+		req->xfer = (void *) VIOBLK_POISON;
+
+		/* Note: blkdev tears down the payload mapping for us. */
+		virtio_free_chain(ve);
+
+		/* returning payload back to blkdev */
+		switch (status) {
+			case VIRTIO_BLK_S_OK:
+				error = 0;
+				break;
+			case VIRTIO_BLK_S_IOERR:
+				error = EIO;
+				sc->sc_stats.io_errors++;
+				break;
+			case VIRTIO_BLK_S_UNSUPP:
+				sc->sc_stats.unsupp_errors++;
+				error = ENOTTY;
+				break;
+			default:
+				sc->sc_stats.nxio_errors++;
+				error = ENXIO;
+				break;
+		}
+
+		if (type == VIRTIO_BLK_T_GET_ID) {
+			/* notify devid_init */
+			mutex_enter(&sc->lock_devid);
+			cv_broadcast(&sc->cv_devid);
+			mutex_exit(&sc->lock_devid);
+		} else
+			bd_xfer_done(xfer, error);
+
+		i++;
+	}
+
+	/* update stats */
+	if (sc->sc_stats.intr_queuemax < i)
+		sc->sc_stats.intr_queuemax = i;
+	sc->sc_stats.intr_total++;
+
+	return (DDI_INTR_CLAIMED);
+}
+
+/* ARGSUSED */
+uint_t
+vioblk_config_handler(caddr_t arg1, caddr_t arg2)
+{
+	return (DDI_INTR_CLAIMED);
+}
+
+static int
+vioblk_register_ints(struct vioblk_softc *sc)
+{
+	int ret;
+
+	struct virtio_int_handler vioblk_conf_h = {
+		vioblk_config_handler
+	};
+
+	struct virtio_int_handler vioblk_vq_h[] = {
+		{ vioblk_int_handler },
+		{ NULL },
+	};
+
+	ret = virtio_register_ints(&sc->sc_virtio,
+	    &vioblk_conf_h, vioblk_vq_h);
+
+	return (ret);
+}
+
+static void
+vioblk_free_reqs(struct vioblk_softc *sc)
+{
+	int i, qsize;
+
+	qsize = sc->sc_vq->vq_num;
+
+	for (i = 0; i < qsize; i++) {
+		struct vioblk_req *req = &sc->sc_reqs[i];
+
+		if (req->ndmac)
+			(void) ddi_dma_unbind_handle(req->dmah);
+
+		if (req->dmah)
+			ddi_dma_free_handle(&req->dmah);
+	}
+
+	kmem_free(sc->sc_reqs, sizeof (struct vioblk_req) * qsize);
+}
+
+static int
+vioblk_alloc_reqs(struct vioblk_softc *sc)
+{
+	int i, qsize;
+	int ret;
+
+	qsize = sc->sc_vq->vq_num;
+
+	sc->sc_reqs = kmem_zalloc(sizeof (struct vioblk_req) * qsize, KM_SLEEP);
+
+	for (i = 0; i < qsize; i++) {
+		struct vioblk_req *req = &sc->sc_reqs[i];
+
+		ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_req_dma_attr,
+		    DDI_DMA_SLEEP, NULL, &req->dmah);
+		if (ret != DDI_SUCCESS) {
+
+			dev_err(sc->sc_dev, CE_WARN,
+			    "Can't allocate dma handle for req "
+			    "buffer %d", i);
+			goto exit;
+		}
+
+		ret = ddi_dma_addr_bind_handle(req->dmah, NULL,
+		    (caddr_t)&req->hdr,
+		    sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
+		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
+		    NULL, &req->dmac, &req->ndmac);
+		if (ret != DDI_DMA_MAPPED) {
+			dev_err(sc->sc_dev, CE_WARN,
+			    "Can't bind req buffer %d", i);
+			goto exit;
+		}
+	}
+
+	return (0);
+
+exit:
+	vioblk_free_reqs(sc);
+	return (ENOMEM);
+}
+
+
+static int
+vioblk_ksupdate(kstat_t *ksp, int rw)
+{
+	struct vioblk_softc *sc = ksp->ks_private;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	sc->ks_data->sts_rw_cookiesmax.value.ui32 = sc->sc_stats.rw_cookiesmax;
+	sc->ks_data->sts_intr_queuemax.value.ui32 = sc->sc_stats.intr_queuemax;
+	sc->ks_data->sts_unsupp_errors.value.ui32 = sc->sc_stats.unsupp_errors;
+	sc->ks_data->sts_nxio_errors.value.ui32 = sc->sc_stats.nxio_errors;
+	sc->ks_data->sts_io_errors.value.ui32 = sc->sc_stats.io_errors;
+	sc->ks_data->sts_rw_cacheflush.value.ui64 = sc->sc_stats.rw_cacheflush;
+	sc->ks_data->sts_intr_total.value.ui64 = sc->sc_stats.intr_total;
+
+
+	return (0);
+}
+
+static int
+vioblk_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
+{
+	int ret = DDI_SUCCESS;
+	int instance;
+	struct vioblk_softc *sc;
+	struct virtio_softc *vsc;
+	struct vioblk_stats *ks_data;
+
+	instance = ddi_get_instance(devinfo);
+
+	switch (cmd) {
+	case DDI_ATTACH:
+		break;
+
+	case DDI_RESUME:
+	case DDI_PM_RESUME:
+		dev_err(devinfo, CE_WARN, "resume not supported yet");
+		ret = DDI_FAILURE;
+		goto exit;
+
+	default:
+		dev_err(devinfo, CE_WARN, "cmd 0x%x not recognized", cmd);
+		ret = DDI_FAILURE;
+		goto exit;
+	}
+
+	sc = kmem_zalloc(sizeof (struct vioblk_softc), KM_SLEEP);
+	ddi_set_driver_private(devinfo, sc);
+
+	vsc = &sc->sc_virtio;
+
+	/* Duplicate for faster access / less typing */
+	sc->sc_dev = devinfo;
+	vsc->sc_dev = devinfo;
+
+	cv_init(&sc->cv_devid, NULL, CV_DRIVER, NULL);
+	mutex_init(&sc->lock_devid, NULL, MUTEX_DRIVER, NULL);
+
+	/*
+	 * Initialize interrupt kstat.  This should not normally fail, since
+	 * we don't use a persistent stat.  We do it this way to avoid having
+	 * to test for it at run time on the hot path.
+	 */
+	sc->sc_intrstat = kstat_create("vioblk", instance,
+	    "intrs", "controller", KSTAT_TYPE_NAMED,
+	    sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_PERSISTENT);
+	if (sc->sc_intrstat == NULL) {
+		dev_err(devinfo, CE_WARN, "kstat_create failed");
+		goto exit_intrstat;
+	}
+	ks_data = (struct vioblk_stats *)sc->sc_intrstat->ks_data;
+	kstat_named_init(&ks_data->sts_rw_outofmemory,
+	    "total_rw_outofmemory", KSTAT_DATA_UINT64);
+	kstat_named_init(&ks_data->sts_rw_badoffset,
+	    "total_rw_badoffset", KSTAT_DATA_UINT64);
+	kstat_named_init(&ks_data->sts_intr_total,
+	    "total_intr", KSTAT_DATA_UINT64);
+	kstat_named_init(&ks_data->sts_io_errors,
+	    "total_io_errors", KSTAT_DATA_UINT32);
+	kstat_named_init(&ks_data->sts_unsupp_errors,
+	    "total_unsupp_errors", KSTAT_DATA_UINT32);
+	kstat_named_init(&ks_data->sts_nxio_errors,
+	    "total_nxio_errors", KSTAT_DATA_UINT32);
+	kstat_named_init(&ks_data->sts_rw_cacheflush,
+	    "total_rw_cacheflush", KSTAT_DATA_UINT64);
+	kstat_named_init(&ks_data->sts_rw_cookiesmax,
+	    "max_rw_cookies", KSTAT_DATA_UINT32);
+	kstat_named_init(&ks_data->sts_intr_queuemax,
+	    "max_intr_queue", KSTAT_DATA_UINT32);
+	sc->ks_data = ks_data;
+	sc->sc_intrstat->ks_private = sc;
+	sc->sc_intrstat->ks_update = vioblk_ksupdate;
+	kstat_install(sc->sc_intrstat);
+
+	/* map BAR0 */
+	ret = ddi_regs_map_setup(devinfo, 1,
+	    (caddr_t *)&sc->sc_virtio.sc_io_addr,
+	    0, 0, &vioblk_attr, &sc->sc_virtio.sc_ioh);
+	if (ret != DDI_SUCCESS) {
+		dev_err(devinfo, CE_WARN, "unable to map bar0: [%d]", ret);
+		goto exit_map;
+	}
+
+	virtio_device_reset(&sc->sc_virtio);
+	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
+	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
+
+	if (vioblk_register_ints(sc)) {
+		dev_err(devinfo, CE_WARN, "Unable to add interrupt");
+		goto exit_int;
+	}
+
+	ret = vioblk_dev_features(sc);
+	if (ret)
+		goto exit_features;
+
+	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_RO)
+		sc->sc_readonly = B_TRUE;
+	else
+		sc->sc_readonly = B_FALSE;
+
+	sc->sc_capacity = virtio_read_device_config_8(&sc->sc_virtio,
+	    VIRTIO_BLK_CONFIG_CAPACITY);
+	sc->sc_nblks = sc->sc_capacity;
+
+	/*
+	 * BLK_SIZE is just a hint for the optimal logical block
+	 * granularity. Ignored for now.
+	 */
+	sc->sc_blk_size = DEV_BSIZE;
+	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_BLK_SIZE) {
+		sc->sc_blk_size = virtio_read_device_config_4(&sc->sc_virtio,
+		    VIRTIO_BLK_CONFIG_BLK_SIZE);
+	}
+
+	/* Flushing is not supported. */
+	if (!(sc->sc_virtio.sc_features & VIRTIO_BLK_F_FLUSH)) {
+		vioblk_ops.o_sync_cache = NULL;
+	}
+
+	sc->sc_seg_max = DEF_MAXINDIRECT;
+	/* The max number of segments (cookies) in a request */
+	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SEG_MAX) {
+		sc->sc_seg_max = virtio_read_device_config_4(&sc->sc_virtio,
+		    VIRTIO_BLK_CONFIG_SEG_MAX);
+
+		/* That's what Linux does. */
+		if (!sc->sc_seg_max)
+			sc->sc_seg_max = 1;
+
+		/*
+		 * SEG_MAX corresponds to the number of _data_
+		 * blocks in a request
+		 */
+		sc->sc_seg_max += 2;
+	}
+	/* 2 descriptors taken for header/status */
+	vioblk_bd_dma_attr.dma_attr_sgllen = sc->sc_seg_max - 2;
+
+
+	/* The maximum size for a cookie in a request. */
+	sc->sc_seg_size_max = DEF_MAXSECTOR;
+	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SIZE_MAX) {
+		sc->sc_seg_size_max = virtio_read_device_config_4(
+		    &sc->sc_virtio, VIRTIO_BLK_CONFIG_SIZE_MAX);
+	}
+
+	/* The maximum request size */
+	vioblk_bd_dma_attr.dma_attr_maxxfer =
+	    vioblk_bd_dma_attr.dma_attr_sgllen * sc->sc_seg_size_max;
+
+	dev_debug(devinfo, CE_NOTE,
+	    "nblks=%" PRIu64 " blksize=%d  num_seg=%d, "
+	    "seg_size=%d, maxxfer=%" PRIu64,
+	    sc->sc_nblks, sc->sc_blk_size,
+	    vioblk_bd_dma_attr.dma_attr_sgllen,
+	    sc->sc_seg_size_max,
+	    vioblk_bd_dma_attr.dma_attr_maxxfer);
+
+
+	sc->sc_vq = virtio_alloc_vq(&sc->sc_virtio, 0, 0,
+	    sc->sc_seg_max, "I/O request");
+	if (sc->sc_vq == NULL) {
+		goto exit_alloc1;
+	}
+
+	ret = vioblk_alloc_reqs(sc);
+	if (ret) {
+		goto exit_alloc2;
+	}
+
+	sc->bd_h = bd_alloc_handle(sc, &vioblk_ops, &vioblk_bd_dma_attr,
+	    KM_SLEEP);
+
+
+	virtio_set_status(&sc->sc_virtio,
+	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
+	virtio_start_vq_intr(sc->sc_vq);
+
+	ret = virtio_enable_ints(&sc->sc_virtio);
+	if (ret)
+		goto exit_enable_ints;
+
+	ret = bd_attach_handle(devinfo, sc->bd_h);
+	if (ret != DDI_SUCCESS) {
+		dev_err(devinfo, CE_WARN, "Failed to attach blkdev");
+		goto exit_attach_bd;
+	}
+
+	return (DDI_SUCCESS);
+
+exit_attach_bd:
+	/*
+	 * There is no virtio_disable_ints(), it's done in virtio_release_ints.
+	 * If they ever get split, don't forget to add a call here.
+	 */
+exit_enable_ints:
+	virtio_stop_vq_intr(sc->sc_vq);
+	bd_free_handle(sc->bd_h);
+	vioblk_free_reqs(sc);
+exit_alloc2:
+	virtio_free_vq(sc->sc_vq);
+exit_alloc1:
+exit_features:
+	virtio_release_ints(&sc->sc_virtio);
+exit_int:
+	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
+	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
+exit_map:
+	kstat_delete(sc->sc_intrstat);
+exit_intrstat:
+	mutex_destroy(&sc->lock_devid);
+	cv_destroy(&sc->cv_devid);
+	kmem_free(sc, sizeof (struct vioblk_softc));
+exit:
+	return (ret);
+}
+
+static int
+vioblk_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
+{
+	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
+
+	switch (cmd) {
+	case DDI_DETACH:
+		break;
+
+	case DDI_PM_SUSPEND:
+		cmn_err(CE_WARN, "suspend not supported yet");
+		return (DDI_FAILURE);
+
+	default:
+		cmn_err(CE_WARN, "cmd 0x%x unrecognized", cmd);
+		return (DDI_FAILURE);
+	}
+
+	(void) bd_detach_handle(sc->bd_h);
+	virtio_stop_vq_intr(sc->sc_vq);
+	virtio_release_ints(&sc->sc_virtio);
+	vioblk_free_reqs(sc);
+	virtio_free_vq(sc->sc_vq);
+	virtio_device_reset(&sc->sc_virtio);
+	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
+	kstat_delete(sc->sc_intrstat);
+	kmem_free(sc, sizeof (struct vioblk_softc));
+
+	return (DDI_SUCCESS);
+}
+
+static int
+vioblk_quiesce(dev_info_t *devinfo)
+{
+	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
+
+	virtio_stop_vq_intr(sc->sc_vq);
+	virtio_device_reset(&sc->sc_virtio);
+
+	return (DDI_SUCCESS);
+}
+
+int
+_init(void)
+{
+	int rv;
+
+	bd_mod_init(&vioblk_dev_ops);
+
+	if ((rv = mod_install(&modlinkage)) != 0) {
+		bd_mod_fini(&vioblk_dev_ops);
+	}
+
+	return (rv);
+}
+
+int
+_fini(void)
+{
+	int rv;
+
+	if ((rv = mod_remove(&modlinkage)) == 0) {
+		bd_mod_fini(&vioblk_dev_ops);
+	}
+
+	return (rv);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/virtio/virtio.c b/usr/src/uts/common/io/virtio/virtio.c
new file mode 100644
index 0000000000..320dc0666a
--- /dev/null
+++ b/usr/src/uts/common/io/virtio/virtio.c
@@ -0,0 +1,1348 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2012 Nexenta Systems, Inc.
+ * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com>
+ */
+
+/* Based on the NetBSD virtio driver by Minoura Makoto. */
+/*
+ * Copyright (c) 2010 Minoura Makoto.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <sys/conf.h>
+#include <sys/kmem.h>
+#include <sys/debug.h>
+#include <sys/modctl.h>
+#include <sys/autoconf.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/avintr.h>
+#include <sys/spl.h>
+#include <sys/promif.h>
+#include <sys/list.h>
+#include <sys/bootconf.h>
+#include <sys/bootsvcs.h>
+#include <sys/sysmacros.h>
+#include <sys/pci.h>
+
+#include "virtiovar.h"
+#include "virtioreg.h"
+#define	NDEVNAMES	(sizeof (virtio_device_name) / sizeof (char *))
+#define	MINSEG_INDIRECT	2	/* use indirect if nsegs >= this value */
+#define	VIRTQUEUE_ALIGN(n) (((n)+(VIRTIO_PAGE_SIZE-1)) & \
+	    ~(VIRTIO_PAGE_SIZE-1))
+
+void
+virtio_set_status(struct virtio_softc *sc, unsigned int status)
+{
+	int old = 0;
+
+	if (status != 0)
+		old = ddi_get8(sc->sc_ioh,
+		    (uint8_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_DEVICE_STATUS));
+
+	ddi_put8(sc->sc_ioh,
+	    (uint8_t *)(sc->sc_io_addr + VIRTIO_CONFIG_DEVICE_STATUS),
+	    status | old);
+}
+
+/*
+ * Negotiate features, save the result in sc->sc_features
+ */
+uint32_t
+virtio_negotiate_features(struct virtio_softc *sc, uint32_t guest_features)
+{
+	uint32_t host_features;
+	uint32_t features;
+
+	host_features = ddi_get32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + VIRTIO_CONFIG_DEVICE_FEATURES));
+
+	dev_debug(sc->sc_dev, CE_NOTE,
+	    "host features: %x, guest features: %x",
+	    host_features, guest_features);
+
+	features = host_features & guest_features;
+	ddi_put32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + VIRTIO_CONFIG_GUEST_FEATURES),
+	    features);
+
+	sc->sc_features = features;
+
+	return (host_features);
+}
+
+size_t
+virtio_show_features(uint32_t features,
+    char *buf, size_t len)
+{
+	char *orig_buf = buf;
+	char *bufend = buf + len;
+
+	/* LINTED E_PTRDIFF_OVERFLOW */
+	buf += snprintf(buf, bufend - buf, "Generic ( ");
+	if (features & VIRTIO_F_RING_INDIRECT_DESC)
+		/* LINTED E_PTRDIFF_OVERFLOW */
+		buf += snprintf(buf, bufend - buf, "INDIRECT_DESC ");
+
+	/* LINTED E_PTRDIFF_OVERFLOW */
+	buf += snprintf(buf, bufend - buf, ") ");
+
+	/* LINTED E_PTRDIFF_OVERFLOW */
+	return (buf - orig_buf);
+}
+
+boolean_t
+virtio_has_feature(struct virtio_softc *sc, uint32_t feature)
+{
+	return (sc->sc_features & feature);
+}
+
+/*
+ * Device configuration registers.
+ */
+uint8_t
+virtio_read_device_config_1(struct virtio_softc *sc, unsigned int index)
+{
+	ASSERT(sc->sc_config_offset);
+	return ddi_get8(sc->sc_ioh,
+	    (uint8_t *)(sc->sc_io_addr + sc->sc_config_offset + index));
+}
+
+uint16_t
+virtio_read_device_config_2(struct virtio_softc *sc, unsigned int index)
+{
+	ASSERT(sc->sc_config_offset);
+	return ddi_get16(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint16_t *)(sc->sc_io_addr + sc->sc_config_offset + index));
+}
+
+uint32_t
+virtio_read_device_config_4(struct virtio_softc *sc, unsigned int index)
+{
+	ASSERT(sc->sc_config_offset);
+	return ddi_get32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset + index));
+}
+
+uint64_t
+virtio_read_device_config_8(struct virtio_softc *sc, unsigned int index)
+{
+	uint64_t r;
+
+	ASSERT(sc->sc_config_offset);
+	r = ddi_get32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset +
+	    index + sizeof (uint32_t)));
+
+	r <<= 32;
+
+	r += ddi_get32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset + index));
+	return (r);
+}
+
+void
+virtio_write_device_config_1(struct virtio_softc *sc,
+    unsigned int index, uint8_t value)
+{
+	ASSERT(sc->sc_config_offset);
+	ddi_put8(sc->sc_ioh,
+	    (uint8_t *)(sc->sc_io_addr + sc->sc_config_offset + index), value);
+}
+
+void
+virtio_write_device_config_2(struct virtio_softc *sc,
+    unsigned int index, uint16_t value)
+{
+	ASSERT(sc->sc_config_offset);
+	ddi_put16(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint16_t *)(sc->sc_io_addr + sc->sc_config_offset + index), value);
+}
+
+void
+virtio_write_device_config_4(struct virtio_softc *sc,
+    unsigned int index, uint32_t value)
+{
+	ASSERT(sc->sc_config_offset);
+	ddi_put32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset + index), value);
+}
+
+void
+virtio_write_device_config_8(struct virtio_softc *sc,
+    unsigned int index, uint64_t value)
+{
+	ASSERT(sc->sc_config_offset);
+	ddi_put32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset + index),
+	    value & 0xFFFFFFFF);
+	ddi_put32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset +
+	    index + sizeof (uint32_t)), value >> 32);
+}
+
+/*
+ * Start/stop vq interrupt.  No guarantee.
+ */
+void
+virtio_stop_vq_intr(struct virtqueue *vq)
+{
+	vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+}
+
+void
+virtio_start_vq_intr(struct virtqueue *vq)
+{
+	vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+}
+
+static ddi_dma_attr_t virtio_vq_dma_attr = {
+	DMA_ATTR_V0,	/* Version number */
+	0,		/* low address */
+	/*
+	 * high address. Has to fit into 32 bits
+	 * after page-shifting
+	 */
+	0x00000FFFFFFFFFFF,
+	0xFFFFFFFF,	/* counter register max */
+	VIRTIO_PAGE_SIZE, /* page alignment required */
+	0x3F,		/* burst sizes: 1 - 32 */
+	0x1,		/* minimum transfer size */
+	0xFFFFFFFF,	/* max transfer size */
+	0xFFFFFFFF,	/* address register max */
+	1,		/* no scatter-gather */
+	1,		/* device operates on bytes */
+	0,		/* attr flag: set to 0 */
+};
+
+static ddi_dma_attr_t virtio_vq_indirect_dma_attr = {
+	DMA_ATTR_V0,	/* Version number */
+	0,		/* low address */
+	0xFFFFFFFFFFFFFFFF, /* high address */
+	0xFFFFFFFF,	/* counter register max */
+	1,		/* No specific alignment */
+	0x3F,		/* burst sizes: 1 - 32 */
+	0x1,		/* minimum transfer size */
+	0xFFFFFFFF,	/* max transfer size */
+	0xFFFFFFFF,	/* address register max */
+	1,		/* no scatter-gather */
+	1,		/* device operates on bytes */
+	0,		/* attr flag: set to 0 */
+};
+
+/* Same for direct and indirect descriptors. */
+static ddi_device_acc_attr_t virtio_vq_devattr = {
+	DDI_DEVICE_ATTR_V0,
+	DDI_NEVERSWAP_ACC,
+	DDI_STORECACHING_OK_ACC,
+	DDI_DEFAULT_ACC
+};
+
+static void
+virtio_free_indirect(struct vq_entry *entry)
+{
+
+	(void) ddi_dma_unbind_handle(entry->qe_indirect_dma_handle);
+	ddi_dma_mem_free(&entry->qe_indirect_dma_acch);
+	ddi_dma_free_handle(&entry->qe_indirect_dma_handle);
+
+	entry->qe_indirect_descs = NULL;
+}
+
+
+static int
+virtio_alloc_indirect(struct virtio_softc *sc, struct vq_entry *entry)
+{
+	int allocsize, num;
+	size_t len;
+	unsigned int ncookies;
+	int ret;
+
+	num = entry->qe_queue->vq_indirect_num;
+	ASSERT(num > 1);
+
+	allocsize = sizeof (struct vring_desc) * num;
+
+	ret = ddi_dma_alloc_handle(sc->sc_dev, &virtio_vq_indirect_dma_attr,
+	    DDI_DMA_SLEEP, NULL, &entry->qe_indirect_dma_handle);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Failed to allocate dma handle for indirect descriptors,"
+		    " entry %d, vq %d", entry->qe_index,
+		    entry->qe_queue->vq_index);
+		goto out_alloc_handle;
+	}
+
+	ret = ddi_dma_mem_alloc(entry->qe_indirect_dma_handle,
+	    allocsize, &virtio_vq_devattr,
+	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
+	    (caddr_t *)&entry->qe_indirect_descs, &len,
+	    &entry->qe_indirect_dma_acch);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Failed to alocate dma memory for indirect descriptors,"
+		    " entry %d, vq %d,", entry->qe_index,
+		    entry->qe_queue->vq_index);
+		goto out_alloc;
+	}
+
+	(void) memset(entry->qe_indirect_descs, 0xff, allocsize);
+
+	ret = ddi_dma_addr_bind_handle(entry->qe_indirect_dma_handle, NULL,
+	    (caddr_t)entry->qe_indirect_descs, len,
+	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
+	    DDI_DMA_SLEEP, NULL, &entry->qe_indirect_dma_cookie, &ncookies);
+	if (ret != DDI_DMA_MAPPED) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Failed to bind dma memory for indirect descriptors,"
+		    "entry %d, vq %d", entry->qe_index,
+		    entry->qe_queue->vq_index);
+		goto out_bind;
+	}
+
+	/* We asked for a single segment */
+	ASSERT(ncookies == 1);
+
+	return (0);
+
+out_bind:
+	ddi_dma_mem_free(&entry->qe_indirect_dma_acch);
+out_alloc:
+	ddi_dma_free_handle(&entry->qe_indirect_dma_handle);
+out_alloc_handle:
+
+	return (ret);
+}
+
+/*
+ * Initialize the vq structure.
+ */
+static int
+virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq)
+{
+	int ret;
+	uint16_t i;
+	int vq_size = vq->vq_num;
+	int indirect_num = vq->vq_indirect_num;
+
+	/* free slot management */
+	list_create(&vq->vq_freelist, sizeof (struct vq_entry),
+	    offsetof(struct vq_entry, qe_list));
+
+	for (i = 0; i < vq_size; i++) {
+		struct vq_entry *entry = &vq->vq_entries[i];
+		list_insert_tail(&vq->vq_freelist, entry);
+		entry->qe_index = i;
+		entry->qe_desc = &vq->vq_descs[i];
+		entry->qe_queue = vq;
+
+		if (indirect_num) {
+			ret = virtio_alloc_indirect(sc, entry);
+			if (ret)
+				goto out_indirect;
+		}
+	}
+
+	mutex_init(&vq->vq_freelist_lock, "virtio-freelist",
+	    MUTEX_DRIVER, DDI_INTR_PRI(sc->sc_intr_prio));
+	mutex_init(&vq->vq_avail_lock, "virtio-avail",
+	    MUTEX_DRIVER, DDI_INTR_PRI(sc->sc_intr_prio));
+	mutex_init(&vq->vq_used_lock, "virtio-used",
+	    MUTEX_DRIVER, DDI_INTR_PRI(sc->sc_intr_prio));
+
+	return (0);
+
+out_indirect:
+	for (i = 0; i < vq_size; i++) {
+		struct vq_entry *entry = &vq->vq_entries[i];
+		if (entry->qe_indirect_descs)
+			virtio_free_indirect(entry);
+	}
+
+	return (ret);
+}
+
+
+
+/*
+ * Allocate/free a vq.
+ */
+struct virtqueue *
+virtio_alloc_vq(struct virtio_softc *sc,
+    unsigned int index,
+    unsigned int size,
+    unsigned int indirect_num,
+    const char *name)
+{
+	int vq_size, allocsize1, allocsize2, allocsize = 0;
+	int ret;
+	unsigned int ncookies;
+	size_t len;
+	struct virtqueue *vq;
+
+
+	ddi_put16(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint16_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_SELECT), index);
+	vq_size = ddi_get16(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint16_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_SIZE));
+	if (vq_size == 0) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "virtqueue dest not exist, index %d for %s\n", index, name);
+		goto out;
+	}
+
+	vq = kmem_zalloc(sizeof (struct virtqueue), KM_SLEEP);
+
+	/* size 0 => use native vq size, good for receive queues. */
+	if (size)
+		vq_size = MIN(vq_size, size);
+
+	/* allocsize1: descriptor table + avail ring + pad */
+	allocsize1 = VIRTQUEUE_ALIGN(sizeof (struct vring_desc) * vq_size +
+	    sizeof (struct vring_avail) +
+	    sizeof (uint16_t) * vq_size);
+	/* allocsize2: used ring + pad */
+	allocsize2 = VIRTQUEUE_ALIGN(sizeof (struct vring_used)
+	    + sizeof (struct vring_used_elem) * vq_size);
+
+	allocsize = allocsize1 + allocsize2;
+
+	ret = ddi_dma_alloc_handle(sc->sc_dev, &virtio_vq_dma_attr,
+	    DDI_DMA_SLEEP, NULL, &vq->vq_dma_handle);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Failed to allocate dma handle for vq %d", index);
+		goto out_alloc_handle;
+	}
+
+	ret = ddi_dma_mem_alloc(vq->vq_dma_handle, allocsize,
+	    &virtio_vq_devattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
+	    (caddr_t *)&vq->vq_vaddr, &len, &vq->vq_dma_acch);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Failed to alocate dma memory for vq %d", index);
+		goto out_alloc;
+	}
+
+
+	ret = ddi_dma_addr_bind_handle(vq->vq_dma_handle, NULL,
+	    (caddr_t)vq->vq_vaddr, len,
+	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
+	    DDI_DMA_SLEEP, NULL, &vq->vq_dma_cookie, &ncookies);
+	if (ret != DDI_DMA_MAPPED) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Failed to bind dma memory for vq %d", index);
+		goto out_bind;
+	}
+
+	/* We asked for a single segment */
+	ASSERT(ncookies == 1);
+	/* and page-ligned buffers. */
+	ASSERT(vq->vq_dma_cookie.dmac_laddress % VIRTIO_PAGE_SIZE == 0);
+
+	(void) memset(vq->vq_vaddr, 0, allocsize);
+
+	/* Make sure all zeros hit the buffer before we point the host to it */
+	membar_producer();
+
+	/* set the vq address */
+	ddi_put32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_ADDRESS),
+	    (vq->vq_dma_cookie.dmac_laddress / VIRTIO_PAGE_SIZE));
+
+	/* remember addresses and offsets for later use */
+	vq->vq_owner = sc;
+	vq->vq_num = vq_size;
+	vq->vq_index = index;
+	vq->vq_descs = vq->vq_vaddr;
+	vq->vq_availoffset = sizeof (struct vring_desc)*vq_size;
+	vq->vq_avail = (void *)(((char *)vq->vq_descs) + vq->vq_availoffset);
+	vq->vq_usedoffset = allocsize1;
+	vq->vq_used = (void *)(((char *)vq->vq_descs) + vq->vq_usedoffset);
+
+	ASSERT(indirect_num == 0 ||
+	    virtio_has_feature(sc, VIRTIO_F_RING_INDIRECT_DESC));
+	vq->vq_indirect_num = indirect_num;
+
+	/* free slot management */
+	vq->vq_entries = kmem_zalloc(sizeof (struct vq_entry) * vq_size,
+	    KM_SLEEP);
+
+	ret = virtio_init_vq(sc, vq);
+	if (ret)
+		goto out_init;
+
+	dev_debug(sc->sc_dev, CE_NOTE,
+	    "Allocated %d entries for vq %d:%s (%d incdirect descs)",
+	    vq_size, index, name, indirect_num * vq_size);
+
+	return (vq);
+
+out_init:
+	kmem_free(vq->vq_entries, sizeof (struct vq_entry) * vq_size);
+	(void) ddi_dma_unbind_handle(vq->vq_dma_handle);
+out_bind:
+	ddi_dma_mem_free(&vq->vq_dma_acch);
+out_alloc:
+	ddi_dma_free_handle(&vq->vq_dma_handle);
+out_alloc_handle:
+	kmem_free(vq, sizeof (struct virtqueue));
+out:
+	return (NULL);
+}
+
+
+void
+virtio_free_vq(struct virtqueue *vq)
+{
+	struct virtio_softc *sc = vq->vq_owner;
+	int i;
+
+	/* tell device that there's no virtqueue any longer */
+	ddi_put16(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint16_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_SELECT),
+	    vq->vq_index);
+	ddi_put32(sc->sc_ioh,
+	    /* LINTED E_BAD_PTR_CAST_ALIGN */
+	    (uint32_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_ADDRESS), 0);
+
+	/* Free the indirect descriptors, if any. */
+	for (i = 0; i < vq->vq_num; i++) {
+		struct vq_entry *entry = &vq->vq_entries[i];
+		if (entry->qe_indirect_descs)
+			virtio_free_indirect(entry);
+	}
+
+	kmem_free(vq->vq_entries, sizeof (struct vq_entry) * vq->vq_num);
+
+	(void) ddi_dma_unbind_handle(vq->vq_dma_handle);
+	ddi_dma_mem_free(&vq->vq_dma_acch);
+	ddi_dma_free_handle(&vq->vq_dma_handle);
+
+	mutex_destroy(&vq->vq_used_lock);
+	mutex_destroy(&vq->vq_avail_lock);
+	mutex_destroy(&vq->vq_freelist_lock);
+
+	kmem_free(vq, sizeof (struct virtqueue));
+}
+
+/*
+ * Free descriptor management.
+ */
+struct vq_entry *
+vq_alloc_entry(struct virtqueue *vq)
+{
+	struct vq_entry *qe;
+
+	mutex_enter(&vq->vq_freelist_lock);
+	if (list_is_empty(&vq->vq_freelist)) {
+		mutex_exit(&vq->vq_freelist_lock);
+		return (NULL);
+	}
+	qe = list_remove_head(&vq->vq_freelist);
+
+	ASSERT(vq->vq_used_entries >= 0);
+	vq->vq_used_entries++;
+
+	mutex_exit(&vq->vq_freelist_lock);
+
+	qe->qe_next = NULL;
+	qe->qe_indirect_next = 0;
+	(void) memset(qe->qe_desc, 0, sizeof (struct vring_desc));
+
+	return (qe);
+}
+
+void
+vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
+{
+	mutex_enter(&vq->vq_freelist_lock);
+
+	list_insert_head(&vq->vq_freelist, qe);
+	vq->vq_used_entries--;
+	ASSERT(vq->vq_used_entries >= 0);
+	mutex_exit(&vq->vq_freelist_lock);
+}
+
+/*
+ * We (intentionally) don't have a global vq mutex, so you are
+ * responsible for external locking to avoid allocting/freeing any
+ * entries before using the returned value. Have fun.
+ */
+uint_t
+vq_num_used(struct virtqueue *vq)
+{
+	/* vq->vq_freelist_lock would not help here. */
+	return (vq->vq_used_entries);
+}
+
+static inline void
+virtio_ve_set_desc(struct vring_desc *desc, uint64_t paddr, uint32_t len,
+    boolean_t write)
+{
+	desc->addr = paddr;
+	desc->len = len;
+	desc->next = 0;
+	desc->flags = 0;
+
+	/* 'write' - from the driver's point of view */
+	if (!write)
+		desc->flags = VRING_DESC_F_WRITE;
+
+
+}
+
+void
+virtio_ve_set(struct vq_entry *qe, uint64_t paddr, uint32_t len,
+    boolean_t write)
+{
+	virtio_ve_set_desc(qe->qe_desc, paddr, len, write);
+}
+
+void
+virtio_ve_add_indirect_buf(struct vq_entry *qe, uint64_t paddr, uint32_t len,
+    boolean_t write)
+{
+	struct vring_desc *indirect_desc;
+
+	ASSERT(qe->qe_queue->vq_indirect_num);
+	ASSERT(qe->qe_indirect_next < qe->qe_queue->vq_indirect_num);
+
+	indirect_desc = &qe->qe_indirect_descs[qe->qe_indirect_next];
+	virtio_ve_set_desc(indirect_desc, paddr, len, write);
+	qe->qe_indirect_next++;
+}
+
+void
+virtio_ve_add_cookie(struct vq_entry *qe, ddi_dma_handle_t dma_handle,
+    ddi_dma_cookie_t dma_cookie, unsigned int ncookies, boolean_t write)
+{
+	int i;
+
+	for (i = 0; i < ncookies; i++) {
+		virtio_ve_add_indirect_buf(qe, dma_cookie.dmac_laddress,
+		    dma_cookie.dmac_size, write);
+		ddi_dma_nextcookie(dma_handle, &dma_cookie);
+	}
+}
+
+void
+virtio_sync_vq(struct virtqueue *vq)
+{
+	struct virtio_softc *vsc = vq->vq_owner;
+
+	/* Make sure the avail ring update hit the buffer */
+	membar_producer();
+
+	vq->vq_avail->idx = vq->vq_avail_idx;
+
+	/* Make sure the avail idx update hits the buffer */
+	membar_producer();
+
+	/* Make sure we see the flags update */
+	membar_consumer();
+
+	if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY))
+		ddi_put16(vsc->sc_ioh,
+		    /* LINTED E_BAD_PTR_CAST_ALIGN */
+		    (uint16_t *)(vsc->sc_io_addr +
+		    VIRTIO_CONFIG_QUEUE_NOTIFY),
+		    vq->vq_index);
+}
+
+void
+virtio_push_chain(struct vq_entry *qe, boolean_t sync)
+{
+	struct virtqueue *vq = qe->qe_queue;
+	struct vq_entry *head = qe;
+	struct vring_desc *desc;
+	int idx;
+
+	ASSERT(qe);
+
+	/*
+	 * Bind the descs together, paddr and len should be already
+	 * set with virtio_ve_set
+	 */
+	do {
+		/* Bind the indirect descriptors */
+		if (qe->qe_indirect_next > 1) {
+			uint16_t i = 0;
+
+			/*
+			 * Set the pointer/flags to the
+			 * first indirect descriptor
+			 */
+			virtio_ve_set_desc(qe->qe_desc,
+			    qe->qe_indirect_dma_cookie.dmac_laddress,
+			    sizeof (struct vring_desc) * qe->qe_indirect_next,
+			    B_FALSE);
+			qe->qe_desc->flags |= VRING_DESC_F_INDIRECT;
+
+			/* For all but the last one, add the next index/flag */
+			do {
+				desc = &qe->qe_indirect_descs[i];
+				i++;
+
+				desc->flags |= VRING_DESC_F_NEXT;
+				desc->next = i;
+			} while (i < qe->qe_indirect_next - 1);
+
+		}
+
+		if (qe->qe_next) {
+			qe->qe_desc->flags |= VRING_DESC_F_NEXT;
+			qe->qe_desc->next = qe->qe_next->qe_index;
+		}
+
+		qe = qe->qe_next;
+	} while (qe);
+
+	mutex_enter(&vq->vq_avail_lock);
+	idx = vq->vq_avail_idx;
+	vq->vq_avail_idx++;
+
+	/* Make sure the bits hit the descriptor(s) */
+	membar_producer();
+	vq->vq_avail->ring[idx % vq->vq_num] = head->qe_index;
+
+	/* Notify the device, if needed. */
+	if (sync)
+		virtio_sync_vq(vq);
+
+	mutex_exit(&vq->vq_avail_lock);
+}
+
+/* Get a chain of descriptors from the used ring, if one is available. */
+struct vq_entry *
+virtio_pull_chain(struct virtqueue *vq, uint32_t *len)
+{
+	struct vq_entry *head;
+	int slot;
+	int usedidx;
+
+	mutex_enter(&vq->vq_used_lock);
+
+	/* No used entries? Bye. */
+	if (vq->vq_used_idx == vq->vq_used->idx) {
+		mutex_exit(&vq->vq_used_lock);
+		return (NULL);
+	}
+
+	usedidx = vq->vq_used_idx;
+	vq->vq_used_idx++;
+	mutex_exit(&vq->vq_used_lock);
+
+	usedidx %= vq->vq_num;
+
+	/* Make sure we do the next step _after_ checking the idx. */
+	membar_consumer();
+
+	slot = vq->vq_used->ring[usedidx].id;
+	*len = vq->vq_used->ring[usedidx].len;
+
+	head = &vq->vq_entries[slot];
+
+	return (head);
+}
+
+void
+virtio_free_chain(struct vq_entry *qe)
+{
+	struct vq_entry *tmp;
+	struct virtqueue *vq = qe->qe_queue;
+
+	ASSERT(qe);
+
+	do {
+		ASSERT(qe->qe_queue == vq);
+		tmp = qe->qe_next;
+		vq_free_entry(vq, qe);
+		qe = tmp;
+	} while (tmp);
+}
+
+void
+virtio_ventry_stick(struct vq_entry *first, struct vq_entry *second)
+{
+	first->qe_next = second;
+}
+
+static int
+virtio_register_msi(struct virtio_softc *sc,
+    struct virtio_int_handler *config_handler,
+    struct virtio_int_handler vq_handlers[],
+    int intr_types)
+{
+	int count, actual;
+	int int_type;
+	int i;
+	int handler_count;
+	int ret;
+
+	/* If both MSI and MSI-x are reported, prefer MSI-x. */
+	int_type = DDI_INTR_TYPE_MSI;
+	if (intr_types & DDI_INTR_TYPE_MSIX)
+		int_type = DDI_INTR_TYPE_MSIX;
+
+	/* Walk the handler table to get the number of handlers. */
+	for (handler_count = 0;
+	    vq_handlers && vq_handlers[handler_count].vh_func;
+	    handler_count++)
+		;
+
+	/* +1 if there is a config change handler. */
+	if (config_handler)
+		handler_count++;
+
+	/* Number of MSIs supported by the device. */
+	ret = ddi_intr_get_nintrs(sc->sc_dev, int_type, &count);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN, "ddi_intr_get_nintrs failed");
+		return (ret);
+	}
+
+	/*
+	 * Those who try to register more handlers then the device
+	 * supports shall suffer.
+	 */
+	ASSERT(handler_count <= count);
+
+	sc->sc_intr_htable = kmem_zalloc(
+	    sizeof (ddi_intr_handle_t) * handler_count, KM_SLEEP);
+
+	ret = ddi_intr_alloc(sc->sc_dev, sc->sc_intr_htable, int_type, 0,
+	    handler_count, &actual, DDI_INTR_ALLOC_NORMAL);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN, "Failed to allocate MSI: %d", ret);
+		goto out_msi_alloc;
+	}
+
+	if (actual != handler_count) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Not enough MSI available: need %d, available %d",
+		    handler_count, actual);
+		goto out_msi_available;
+	}
+
+	sc->sc_intr_num = handler_count;
+	sc->sc_intr_config = B_FALSE;
+	if (config_handler) {
+		sc->sc_intr_config = B_TRUE;
+	}
+
+	/* Assume they are all same priority */
+	ret = ddi_intr_get_pri(sc->sc_intr_htable[0], &sc->sc_intr_prio);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN, "ddi_intr_get_pri failed");
+		goto out_msi_prio;
+	}
+
+	/* Add the vq handlers */
+	for (i = 0; vq_handlers[i].vh_func; i++) {
+		ret = ddi_intr_add_handler(sc->sc_intr_htable[i],
+		    vq_handlers[i].vh_func,
+		    sc, vq_handlers[i].vh_priv);
+		if (ret != DDI_SUCCESS) {
+			dev_err(sc->sc_dev, CE_WARN,
+			    "ddi_intr_add_handler failed");
+			/* Remove the handlers that succeeded. */
+			while (--i >= 0) {
+				(void) ddi_intr_remove_handler(
+				    sc->sc_intr_htable[i]);
+			}
+			goto out_add_handlers;
+		}
+	}
+
+	/* Don't forget the config handler */
+	if (config_handler) {
+		ret = ddi_intr_add_handler(sc->sc_intr_htable[i],
+		    config_handler->vh_func,
+		    sc, config_handler->vh_priv);
+		if (ret != DDI_SUCCESS) {
+			dev_err(sc->sc_dev, CE_WARN,
+			    "ddi_intr_add_handler failed");
+			/* Remove the handlers that succeeded. */
+			while (--i >= 0) {
+				(void) ddi_intr_remove_handler(
+				    sc->sc_intr_htable[i]);
+			}
+			goto out_add_handlers;
+		}
+	}
+
+	/* We know we are using MSI, so set the config offset. */
+	sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
+
+	ret = ddi_intr_get_cap(sc->sc_intr_htable[0],
+	    &sc->sc_intr_cap);
+	/* Just in case. */
+	if (ret != DDI_SUCCESS)
+		sc->sc_intr_cap = 0;
+
+out_add_handlers:
+out_msi_prio:
+out_msi_available:
+	for (i = 0; i < actual; i++)
+		(void) ddi_intr_free(sc->sc_intr_htable[i]);
+out_msi_alloc:
+	kmem_free(sc->sc_intr_htable, sizeof (ddi_intr_handle_t) * count);
+
+	return (ret);
+}
+
+struct virtio_handler_container {
+	int nhandlers;
+	struct virtio_int_handler config_handler;
+	struct virtio_int_handler vq_handlers[];
+};
+
+uint_t
+virtio_intx_dispatch(caddr_t arg1, caddr_t arg2)
+{
+	struct virtio_softc *sc = (void *)arg1;
+	struct virtio_handler_container *vhc = (void *)arg2;
+	uint8_t isr_status;
+	int i;
+
+	isr_status = ddi_get8(sc->sc_ioh, (uint8_t *)(sc->sc_io_addr +
+	    VIRTIO_CONFIG_ISR_STATUS));
+
+	if (!isr_status)
+		return (DDI_INTR_UNCLAIMED);
+
+	if ((isr_status & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
+	    vhc->config_handler.vh_func) {
+		vhc->config_handler.vh_func((void *)sc,
+		    vhc->config_handler.vh_priv);
+	}
+
+	/* Notify all handlers */
+	for (i = 0; i < vhc->nhandlers; i++) {
+		vhc->vq_handlers[i].vh_func((void *)sc,
+		    vhc->vq_handlers[i].vh_priv);
+	}
+
+	return (DDI_INTR_CLAIMED);
+}
+
+/*
+ * config_handler and vq_handlers may be allocated on stack.
+ * Take precautions not to loose them.
+ */
+static int
+virtio_register_intx(struct virtio_softc *sc,
+    struct virtio_int_handler *config_handler,
+    struct virtio_int_handler vq_handlers[])
+{
+	int vq_handler_count;
+	int config_handler_count = 0;
+	int actual;
+	struct virtio_handler_container *vhc;
+	int ret = DDI_FAILURE;
+
+	/* Walk the handler table to get the number of handlers. */
+	for (vq_handler_count = 0;
+	    vq_handlers && vq_handlers[vq_handler_count].vh_func;
+	    vq_handler_count++)
+		;
+
+	if (config_handler)
+		config_handler_count = 1;
+
+	vhc = kmem_zalloc(sizeof (struct virtio_handler_container) +
+	    sizeof (struct virtio_int_handler) * vq_handler_count,
+	    KM_SLEEP);
+
+	vhc->nhandlers = vq_handler_count;
+	(void) memcpy(vhc->vq_handlers, vq_handlers,
+	    sizeof (struct virtio_int_handler) * vq_handler_count);
+
+	if (config_handler) {
+		(void) memcpy(&vhc->config_handler, config_handler,
+		    sizeof (struct virtio_int_handler));
+	}
+
+	/* Just a single entry for a single interrupt. */
+	sc->sc_intr_htable = kmem_zalloc(sizeof (ddi_intr_handle_t), KM_SLEEP);
+
+	ret = ddi_intr_alloc(sc->sc_dev, sc->sc_intr_htable,
+	    DDI_INTR_TYPE_FIXED, 0, 1, &actual,
+	    DDI_INTR_ALLOC_NORMAL);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Failed to allocate a fixed interrupt: %d", ret);
+		goto out_int_alloc;
+	}
+
+	ASSERT(actual == 1);
+	sc->sc_intr_num = 1;
+
+	ret = ddi_intr_get_pri(sc->sc_intr_htable[0], &sc->sc_intr_prio);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN, "ddi_intr_get_pri failed");
+		goto out_prio;
+	}
+
+	ret = ddi_intr_add_handler(sc->sc_intr_htable[0],
+	    virtio_intx_dispatch, sc, vhc);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN, "ddi_intr_add_handler failed");
+		goto out_add_handlers;
+	}
+
+	/* We know we are not using MSI, so set the config offset. */
+	sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
+
+	return (DDI_SUCCESS);
+
+out_add_handlers:
+out_prio:
+	(void) ddi_intr_free(sc->sc_intr_htable[0]);
+out_int_alloc:
+	kmem_free(sc->sc_intr_htable, sizeof (ddi_intr_handle_t));
+	kmem_free(vhc, sizeof (struct virtio_int_handler) *
+	    (vq_handler_count + config_handler_count));
+	return (ret);
+}
+
+/*
+ * We find out if we support MSI during this, and the register layout
+ * depends on the MSI (doh). Don't acces the device specific bits in
+ * BAR 0 before calling it!
+ */
+int
+virtio_register_ints(struct virtio_softc *sc,
+    struct virtio_int_handler *config_handler,
+    struct virtio_int_handler vq_handlers[])
+{
+	int ret;
+	int intr_types;
+
+	/* Determine which types of interrupts are supported */
+	ret = ddi_intr_get_supported_types(sc->sc_dev, &intr_types);
+	if (ret != DDI_SUCCESS) {
+		dev_err(sc->sc_dev, CE_WARN, "Can't get supported int types");
+		goto out_inttype;
+	}
+
+	/* If we have msi, let's use them. */
+	if (intr_types & (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) {
+		ret = virtio_register_msi(sc, config_handler,
+		    vq_handlers, intr_types);
+		if (!ret)
+			return (0);
+	}
+
+	/* Fall back to old-fashioned interrupts. */
+	if (intr_types & DDI_INTR_TYPE_FIXED) {
+		dev_debug(sc->sc_dev, CE_WARN,
+		    "Using legacy interrupts");
+
+		return (virtio_register_intx(sc, config_handler, vq_handlers));
+	}
+
+	dev_err(sc->sc_dev, CE_WARN,
+	    "MSI failed and fixed interrupts not supported. Giving up.");
+	ret = DDI_FAILURE;
+
+out_inttype:
+	return (ret);
+}
+
+
+static int
+virtio_enable_msi(struct virtio_softc *sc)
+{
+	int ret, i;
+	int vq_handler_count = sc->sc_intr_num;
+
+	/* Number of handlers, not counting the counfig. */
+	if (sc->sc_intr_config)
+		vq_handler_count--;
+
+	/* Enable the iterrupts. Either the whole block, or one by one. */
+	if (sc->sc_intr_cap & DDI_INTR_FLAG_BLOCK) {
+		ret = ddi_intr_block_enable(sc->sc_intr_htable,
+		    sc->sc_intr_num);
+		if (ret != DDI_SUCCESS) {
+			dev_err(sc->sc_dev, CE_WARN,
+			    "Failed to enable MSI, falling back to INTx");
+			goto out_enable;
+		}
+	} else {
+		for (i = 0; i < sc->sc_intr_num; i++) {
+			ret = ddi_intr_enable(sc->sc_intr_htable[i]);
+			if (ret != DDI_SUCCESS) {
+				dev_err(sc->sc_dev, CE_WARN,
+				    "Failed to enable MSI %d, "
+				    "falling back to INTx", i);
+
+				while (--i >= 0) {
+					(void) ddi_intr_disable(
+					    sc->sc_intr_htable[i]);
+				}
+				goto out_enable;
+			}
+		}
+	}
+
+	/* Bind the allocated MSI to the queues and config */
+	for (i = 0; i < vq_handler_count; i++) {
+		int check;
+		ddi_put16(sc->sc_ioh,
+		    /* LINTED E_BAD_PTR_CAST_ALIGN */
+		    (uint16_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_QUEUE_SELECT), i);
+
+		ddi_put16(sc->sc_ioh,
+		    /* LINTED E_BAD_PTR_CAST_ALIGN */
+		    (uint16_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_QUEUE_VECTOR), i);
+
+		check = ddi_get16(sc->sc_ioh,
+		    /* LINTED E_BAD_PTR_CAST_ALIGN */
+		    (uint16_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_QUEUE_VECTOR));
+		if (check != i) {
+			dev_err(sc->sc_dev, CE_WARN, "Failed to bind handler"
+			    "for VQ %d, MSI %d. Check = %x", i, i, check);
+			ret = ENODEV;
+			goto out_bind;
+		}
+	}
+
+	if (sc->sc_intr_config) {
+		int check;
+		ddi_put16(sc->sc_ioh,
+		    /* LINTED E_BAD_PTR_CAST_ALIGN */
+		    (uint16_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_CONFIG_VECTOR), i);
+
+		check = ddi_get16(sc->sc_ioh,
+		    /* LINTED E_BAD_PTR_CAST_ALIGN */
+		    (uint16_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_CONFIG_VECTOR));
+		if (check != i) {
+			dev_err(sc->sc_dev, CE_WARN, "Failed to bind handler "
+			    "for Config updates, MSI %d", i);
+			ret = ENODEV;
+			goto out_bind;
+		}
+	}
+
+	return (DDI_SUCCESS);
+
+out_bind:
+	/* Unbind the vqs */
+	for (i = 0; i < vq_handler_count - 1; i++) {
+		ddi_put16(sc->sc_ioh,
+		    /* LINTED E_BAD_PTR_CAST_ALIGN */
+		    (uint16_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_QUEUE_SELECT), i);
+
+		ddi_put16(sc->sc_ioh,
+		    /* LINTED E_BAD_PTR_CAST_ALIGN */
+		    (uint16_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_QUEUE_VECTOR),
+		    VIRTIO_MSI_NO_VECTOR);
+	}
+	/* And the config */
+	/* LINTED E_BAD_PTR_CAST_ALIGN */
+	ddi_put16(sc->sc_ioh, (uint16_t *)(sc->sc_io_addr +
+	    VIRTIO_CONFIG_CONFIG_VECTOR), VIRTIO_MSI_NO_VECTOR);
+
+	ret = DDI_FAILURE;
+
+out_enable:
+	return (ret);
+}
+
+static int virtio_enable_intx(struct virtio_softc *sc)
+{
+	int ret;
+
+	ret = ddi_intr_enable(sc->sc_intr_htable[0]);
+	if (ret != DDI_SUCCESS)
+		dev_err(sc->sc_dev, CE_WARN,
+		    "Failed to enable interrupt: %d", ret);
+	return (ret);
+}
+
+/*
+ * We can't enable/disable individual handlers in the INTx case so do
+ * the whole bunch even in the msi case.
+ */
+int
+virtio_enable_ints(struct virtio_softc *sc)
+{
+
+	/* See if we are using MSI. */
+	if (sc->sc_config_offset == VIRTIO_CONFIG_DEVICE_CONFIG_MSI)
+		return (virtio_enable_msi(sc));
+
+	ASSERT(sc->sc_config_offset == VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI);
+
+	return (virtio_enable_intx(sc));
+}
+
+void
+virtio_release_ints(struct virtio_softc *sc)
+{
+	int i;
+	int ret;
+
+	/* We were running with MSI, unbind them. */
+	if (sc->sc_config_offset == VIRTIO_CONFIG_DEVICE_CONFIG_MSI) {
+		/* Unbind all vqs */
+		for (i = 0; i < sc->sc_nvqs; i++) {
+			ddi_put16(sc->sc_ioh,
+			    /* LINTED E_BAD_PTR_CAST_ALIGN */
+			    (uint16_t *)(sc->sc_io_addr +
+			    VIRTIO_CONFIG_QUEUE_SELECT), i);
+
+			ddi_put16(sc->sc_ioh,
+			    /* LINTED E_BAD_PTR_CAST_ALIGN */
+			    (uint16_t *)(sc->sc_io_addr +
+			    VIRTIO_CONFIG_QUEUE_VECTOR),
+			    VIRTIO_MSI_NO_VECTOR);
+		}
+		/* And the config */
+		/* LINTED E_BAD_PTR_CAST_ALIGN */
+		ddi_put16(sc->sc_ioh, (uint16_t *)(sc->sc_io_addr +
+		    VIRTIO_CONFIG_CONFIG_VECTOR),
+		    VIRTIO_MSI_NO_VECTOR);
+
+	}
+
+	/* Disable the iterrupts. Either the whole block, or one by one. */
+	if (sc->sc_intr_cap & DDI_INTR_FLAG_BLOCK) {
+		ret = ddi_intr_block_disable(sc->sc_intr_htable,
+		    sc->sc_intr_num);
+		if (ret != DDI_SUCCESS) {
+			dev_err(sc->sc_dev, CE_WARN,
+			    "Failed to disable MSIs, won't be able to"
+			    "reuse next time");
+		}
+	} else {
+		for (i = 0; i < sc->sc_intr_num; i++) {
+			ret = ddi_intr_disable(sc->sc_intr_htable[i]);
+			if (ret != DDI_SUCCESS) {
+				dev_err(sc->sc_dev, CE_WARN,
+				    "Failed to disable interrupt %d, "
+				    "won't be able to reuse", i);
+
+			}
+		}
+	}
+
+
+	for (i = 0; i < sc->sc_intr_num; i++) {
+		(void) ddi_intr_remove_handler(sc->sc_intr_htable[i]);
+	}
+
+	for (i = 0; i < sc->sc_intr_num; i++)
+		(void) ddi_intr_free(sc->sc_intr_htable[i]);
+
+	kmem_free(sc->sc_intr_htable,
+	    sizeof (ddi_intr_handle_t) * sc->sc_intr_num);
+
+
+	/* After disabling interrupts, the config offset is non-MSI. */
+	sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
+}
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modlmisc modlmisc = {
+	&mod_miscops, /* Type of module */
+	"VirtIO common library module",
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	{
+		(void *)&modlmisc,
+		NULL
+	}
+};
+
+int
+_init(void)
+{
+	return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+	return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/virtio/virtioreg.h b/usr/src/uts/common/io/virtio/virtioreg.h
new file mode 100644
index 0000000000..8cfcd59a47
--- /dev/null
+++ b/usr/src/uts/common/io/virtio/virtioreg.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2010 Minoura Makoto.
+ * Copyright (c) 2012 Nexenta Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Part of the file derived from `Virtio PCI Card Specification v0.8.6 DRAFT'
+ * Appendix A.
+ */
+
+/*
+ * An interface for efficient virtio implementation.
+ *
+ * This header is BSD licensed so anyone can use the definitions
+ * to implement compatible drivers/servers.
+ *
+ * Copyright 2007, 2009, IBM Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' ANDANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#ifndef __VIRTIOREG_H__
+#define	__VIRTIOREG_H__
+
+#include <sys/types.h>
+
+#define	PCI_VENDOR_QUMRANET 0x1af4
+#define	PCI_DEV_VIRTIO_MIN 0x1000
+#define	PCI_DEV_VIRTIO_MAX 0x103f
+#define	VIRTIO_PCI_ABI_VERSION 0
+
+/* Virtio product id (subsystem) */
+#define	PCI_PRODUCT_VIRTIO_NETWORK	1
+#define	PCI_PRODUCT_VIRTIO_BLOCK	2
+#define	PCI_PRODUCT_VIRTIO_CONSOLE	3
+#define	PCI_PRODUCT_VIRTIO_ENTROPY	4
+#define	PCI_PRODUCT_VIRTIO_BALLOON	5
+#define	PCI_PRODUCT_VIRTIO_9P		9
+
+/* Virtio header */
+#define	VIRTIO_CONFIG_DEVICE_FEATURES		0 /* 32bit */
+#define	VIRTIO_CONFIG_GUEST_FEATURES		4 /* 32bit */
+
+#define	VIRTIO_F_NOTIFY_ON_EMPTY		(1<<24)
+#define	VIRTIO_F_RING_INDIRECT_DESC		(1<<28)
+#define	VIRTIO_F_BAD_FEATURE			(1<<30)
+
+#define	VIRTIO_CONFIG_QUEUE_ADDRESS		8 /* 32bit */
+#define	VIRTIO_CONFIG_QUEUE_SIZE		12 /* 16bit */
+#define	VIRTIO_CONFIG_QUEUE_SELECT		14 /* 16bit */
+#define	VIRTIO_CONFIG_QUEUE_NOTIFY		16 /* 16bit */
+#define	VIRTIO_CONFIG_DEVICE_STATUS		18 /* 8bit */
+
+#define	VIRTIO_CONFIG_DEVICE_STATUS_RESET	0
+#define	VIRTIO_CONFIG_DEVICE_STATUS_ACK		1
+#define	VIRTIO_CONFIG_DEVICE_STATUS_DRIVER	2
+#define	VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK	4
+#define	VIRTIO_CONFIG_DEVICE_STATUS_FAILED	128
+
+#define	VIRTIO_CONFIG_ISR_STATUS		19 /* 8bit */
+#define	VIRTIO_CONFIG_ISR_CONFIG_CHANGE		2
+
+#define	VIRTIO_CONFIG_CONFIG_VECTOR		20 /* 16bit, optional */
+#define	VIRTIO_CONFIG_QUEUE_VECTOR		22
+
+#define	VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI	20
+#define	VIRTIO_CONFIG_DEVICE_CONFIG_MSI		24
+
+#define	VIRTIO_MSI_NO_VECTOR 0xffff
+
+/* Virtqueue */
+/* This marks a buffer as continuing via the next field. */
+#define	VRING_DESC_F_NEXT	1
+/*
+ * This marks a buffer as write-only, from the devices's perspective.
+ * (otherwise read-only).
+ */
+#define	VRING_DESC_F_WRITE	2
+/* This means the buffer contains a list of buffer descriptors. */
+#define	VRING_DESC_F_INDIRECT	4
+
+/*
+ * The Host uses this in used->flags to advise the Guest: don't kick me
+ * when you add a buffer.  It's unreliable, so it's simply an
+ * optimization.  Guest will still kick if it's out of buffers.
+ */
+#define	VRING_USED_F_NO_NOTIFY	1
+/*
+ *  The Guest uses this in avail->flags to advise the Host: don't
+ * interrupt me when you consume a buffer.  It's unreliable, so it's
+ * simply an optimization.
+ */
+#define	VRING_AVAIL_F_NO_INTERRUPT	1
+
+/*
+ * Virtio ring descriptors: 16 bytes.
+ * These can chain together via "next".
+ */
+struct vring_desc {
+	/* Address (guest-physical). */
+	uint64_t addr;
+	/* Length. */
+	uint32_t len;
+	/* The flags as indicated above. */
+	uint16_t flags;
+	/* We chain unused descriptors via this, too */
+	uint16_t next;
+} __attribute__((packed));
+
+struct vring_avail {
+	uint16_t flags;
+	uint16_t idx;
+	uint16_t ring[];
+} __attribute__((packed));
+
+/* u32 is used here for ids for padding reasons. */
+struct vring_used_elem {
+	/* Index of start of used descriptor chain. */
+	uint32_t id;
+	/* Total length of the descriptor chain which was written to. */
+	uint32_t len;
+} __attribute__((packed));
+
+struct vring_used {
+	uint16_t flags;
+	uint16_t idx;
+	struct vring_used_elem ring[];
+} __attribute__((packed));
+
+
+/* Got nothing to do with the system page size, just a confusing name. */
+#define	VIRTIO_PAGE_SIZE	(4096)
+
+#endif /* __VIRTIOREG_H__ */
diff --git a/usr/src/uts/common/io/virtio/virtiovar.h b/usr/src/uts/common/io/virtio/virtiovar.h
new file mode 100644
index 0000000000..e1617feb5d
--- /dev/null
+++ b/usr/src/uts/common/io/virtio/virtiovar.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2010 Minoura Makoto.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Part of the file derived from `Virtio PCI Card Specification v0.8.6 DRAFT'
+ * Appendix A.
+ */
+
+/*
+ * An interface for efficient virtio implementation.
+ *
+ * This header is BSD licensed so anyone can use the definitions
+ * to implement compatible drivers/servers.
+ *
+ * Copyright 2007, 2009, IBM Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' ANDANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
+
+#ifndef __VIRTIOVAR_H__
+#define	__VIRTIOVAR_H__
+
+#include <sys/types.h>
+#include <sys/dditypes.h>
+#include <sys/cmn_err.h>
+#include <sys/list.h>
+
+#ifdef DEBUG
+#define	dev_debug(dip, fmt, arg...) \
+	dev_err(dip, fmt, ##arg)
+#else
+#define	dev_debug(dip, fmt, arg...)
+#endif
+
+struct vq_entry {
+	list_node_t		qe_list;
+	struct virtqueue	*qe_queue;
+	uint16_t		qe_index; /* index in vq_desc array */
+	/* followings are used only when it is the `head' entry */
+	struct vq_entry		*qe_next;
+	struct vring_desc	*qe_desc;
+	ddi_dma_cookie_t	qe_indirect_dma_cookie;
+	ddi_dma_handle_t	qe_indirect_dma_handle;
+	ddi_acc_handle_t	qe_indirect_dma_acch;
+	struct vring_desc	*qe_indirect_descs;
+	unsigned int 		qe_indirect_next;
+};
+
+struct virtqueue {
+	struct virtio_softc	*vq_owner;
+	unsigned int		vq_num; /* queue size (# of entries) */
+	unsigned int		vq_indirect_num;
+	int			vq_index; /* queue number (0, 1, ...) */
+
+	/* vring pointers (KVA) */
+	struct vring_desc	*vq_descs;
+	struct vring_avail	*vq_avail;
+	struct vring_used	*vq_used;
+
+	/* virtqueue allocation info */
+	void			*vq_vaddr;
+	int			vq_availoffset;
+	int			vq_usedoffset;
+	ddi_dma_cookie_t	vq_dma_cookie;
+	ddi_dma_handle_t	vq_dma_handle;
+	ddi_acc_handle_t	vq_dma_acch;
+
+	int			vq_maxsegsize;
+
+	/* free entry management */
+	struct vq_entry		*vq_entries;
+	list_t			vq_freelist;
+	kmutex_t		vq_freelist_lock;
+	int			vq_used_entries;
+
+	/* enqueue/dequeue status */
+	uint16_t		vq_avail_idx;
+	kmutex_t		vq_avail_lock;
+	uint16_t		vq_used_idx;
+	kmutex_t		vq_used_lock;
+};
+
+struct virtio_softc {
+	dev_info_t		*sc_dev;
+
+	uint_t			sc_intr_prio;
+
+	ddi_acc_handle_t	sc_ioh;
+	caddr_t			sc_io_addr;
+	int			sc_config_offset;
+
+	uint32_t		sc_features;
+
+	int			sc_nvqs; /* set by the user */
+
+	ddi_intr_handle_t	*sc_intr_htable;
+	int			sc_intr_num;
+	boolean_t		sc_intr_config;
+	int			sc_intr_cap;
+};
+
+struct virtio_int_handler {
+	ddi_intr_handler_t *vh_func;
+	void *vh_priv;
+};
+
+/* public interface */
+uint32_t virtio_negotiate_features(struct virtio_softc *, uint32_t);
+size_t virtio_show_features(uint32_t features, char *buffer, size_t len);
+boolean_t virtio_has_feature(struct virtio_softc *sc, uint32_t feature);
+void virtio_set_status(struct virtio_softc *sc, unsigned int);
+#define	virtio_device_reset(sc)	virtio_set_status((sc), 0)
+
+uint8_t virtio_read_device_config_1(struct virtio_softc *sc,
+		unsigned int index);
+uint16_t virtio_read_device_config_2(struct virtio_softc *sc,
+		unsigned int index);
+uint32_t virtio_read_device_config_4(struct virtio_softc *sc,
+		unsigned int index);
+uint64_t virtio_read_device_config_8(struct virtio_softc *sc,
+		unsigned int index);
+void virtio_write_device_config_1(struct virtio_softc *sc,
+		unsigned int index, uint8_t value);
+void virtio_write_device_config_2(struct virtio_softc *sc,
+		unsigned int index, uint16_t value);
+void virtio_write_device_config_4(struct virtio_softc *sc,
+		unsigned int index, uint32_t value);
+void virtio_write_device_config_8(struct virtio_softc *sc,
+		unsigned int index, uint64_t value);
+
+struct virtqueue *virtio_alloc_vq(struct virtio_softc *sc,
+		unsigned int index, unsigned int size,
+		unsigned int indirect_num, const char *name);
+void virtio_free_vq(struct virtqueue *);
+void virtio_reset(struct virtio_softc *);
+struct vq_entry *vq_alloc_entry(struct virtqueue *vq);
+void vq_free_entry(struct virtqueue *vq, struct vq_entry *qe);
+uint_t vq_num_used(struct virtqueue *vq);
+
+void virtio_stop_vq_intr(struct virtqueue *);
+void virtio_start_vq_intr(struct virtqueue *);
+
+void virtio_ve_add_cookie(struct vq_entry *qe, ddi_dma_handle_t dma_handle,
+    ddi_dma_cookie_t dma_cookie, unsigned int ncookies, boolean_t write);
+void virtio_ve_add_indirect_buf(struct vq_entry *qe, uint64_t paddr,
+    uint32_t len, boolean_t write);
+void virtio_ve_set(struct vq_entry *qe, uint64_t paddr, uint32_t len,
+		boolean_t write);
+
+void virtio_push_chain(struct vq_entry *qe, boolean_t sync);
+struct vq_entry *virtio_pull_chain(struct virtqueue *vq, uint32_t *len);
+void virtio_free_chain(struct vq_entry *ve);
+void virtio_sync_vq(struct virtqueue *vq);
+
+int virtio_register_ints(struct virtio_softc *sc,
+		struct virtio_int_handler *config_handler,
+		struct virtio_int_handler vq_handlers[]);
+void virtio_release_ints(struct virtio_softc *sc);
+int virtio_enable_ints(struct virtio_softc *sc);
+
+#endif /* __VIRTIOVAR_H__ */
diff --git a/usr/src/uts/common/nfs/nfs4.h b/usr/src/uts/common/nfs/nfs4.h
index 829043bbe3..cf36c03d0a 100644
--- a/usr/src/uts/common/nfs/nfs4.h
+++ b/usr/src/uts/common/nfs/nfs4.h
@@ -22,6 +22,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
 
 #ifndef _NFS4_H
 #define	_NFS4_H
@@ -1307,7 +1310,7 @@ extern char	*utf8_to_fn(utf8string *, uint_t *, char *);
 extern utf8string *str_to_utf8(char *, utf8string *);
 extern utf8string *utf8_copy(utf8string *, utf8string *);
 extern int	utf8_compare(const utf8string *, const utf8string *);
-extern int	utf8_dir_verify(utf8string *);
+extern nfsstat4	utf8_dir_verify(utf8string *);
 extern char	*utf8_strchr(utf8string *, const char);
 extern int	ln_ace4_cmp(nfsace4 *, nfsace4 *, int);
 extern int	vs_aent_to_ace4(vsecattr_t *, vsecattr_t *, int, int);
diff --git a/usr/src/uts/common/os/sunmdi.c b/usr/src/uts/common/os/sunmdi.c
index a4ee88fef4..f174b4d4aa 100644
--- a/usr/src/uts/common/os/sunmdi.c
+++ b/usr/src/uts/common/os/sunmdi.c
@@ -23,8 +23,8 @@
  */
 
 /*
- * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
- * detailed discussion of the overall mpxio architecture.
+ * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
+ * more detailed discussion of the overall mpxio architecture.
  *
  * Default locking order:
  *
diff --git a/usr/src/uts/common/smbsrv/string.h b/usr/src/uts/common/smbsrv/string.h
index ceeb8accde..14b9cac8b8 100644
--- a/usr/src/uts/common/smbsrv/string.h
+++ b/usr/src/uts/common/smbsrv/string.h
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  */
 
 #ifndef	_SMBSRV_STRING_H
@@ -123,8 +124,7 @@ int smb_isstrupr(const char *);
 int smb_isstrlwr(const char *);
 int smb_strcasecmp(const char *, const char *, size_t);
 
-boolean_t smb_match(char *, char *);
-boolean_t smb_match_ci(char *, char *);
+boolean_t smb_match(const char *, const char *, boolean_t);
 
 size_t smb_mbstowcs(smb_wchar_t *, const char *, size_t);
 size_t smb_wcstombs(char *, const smb_wchar_t *, size_t);
diff --git a/usr/src/uts/common/sys/elf.h b/usr/src/uts/common/sys/elf.h
index 6d66401259..bc25aee9c4 100644
--- a/usr/src/uts/common/sys/elf.h
+++ b/usr/src/uts/common/sys/elf.h
@@ -19,6 +19,9 @@
  * CDDL HEADER END
  */
 /*
+ * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
+ */
+/*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
@@ -812,7 +815,8 @@ typedef	Elf64_Word	Elf64_Capchain;
 #define	NT_PRPRIVINFO	19	/* priv_impl_info_t <sys/priv.h>	*/
 #define	NT_CONTENT	20	/* core_content_t <sys/corectl.h>	*/
 #define	NT_ZONENAME	21	/* string from getzonenamebyid(3C)	*/
-#define	NT_NUM		21
+#define	NT_FDINFO	22	/* open fd info 			*/
+#define	NT_NUM		22
 
 
 #ifdef _KERNEL
diff --git a/usr/src/uts/common/sys/ipmi.h b/usr/src/uts/common/sys/ipmi.h
index 9dafac407d..94a53392de 100644
--- a/usr/src/uts/common/sys/ipmi.h
+++ b/usr/src/uts/common/sys/ipmi.h
@@ -42,8 +42,8 @@ extern "C" {
 
 #define	IPMI_MAX_ADDR_SIZE		0x20
 #define	IPMI_MAX_RX			1024
-#define	IPMI_BMC_SLAVE_ADDR		0x20 /* Linux Default slave address */
-#define	IPMI_BMC_CHANNEL		0x0f /* Linux BMC channel */
+#define	IPMI_BMC_SLAVE_ADDR		0x20 /* Default slave address */
+#define	IPMI_BMC_CHANNEL		0x0f /* BMC channel */
 
 #define	IPMI_BMC_SMS_LUN		0x02
 
diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h
index 12a6925368..f592fd9dcf 100644
--- a/usr/src/uts/common/sys/procfs.h
+++ b/usr/src/uts/common/sys/procfs.h
@@ -23,12 +23,13 @@
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
+ */
 
 #ifndef _SYS_PROCFS_H
 #define	_SYS_PROCFS_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -61,6 +62,8 @@ extern "C" {
 #include <sys/pset.h>
 #include <sys/procfs_isa.h>
 #include <sys/priv.h>
+#include <sys/stat.h>
+#include <sys/param.h>
 
 /*
  * System call interfaces for /proc.
@@ -488,6 +491,38 @@ typedef struct prasmap {
 #define	PG_HWMAPPED	0x04		/* page is present and mapped */
 
 /*
+ * Open files.  Only in core files (for now).  Note that we'd like to use
+ * the stat or stat64 structure, but both of these structures are unfortunately
+ * not consistent between 32 and 64 bit modes.  To keep our lives simpler, we
+ * just define our own structure with types that are not sensitive to this
+ * difference.  Also, it turns out that pfiles omits a lot of info from the
+ * struct stat (e.g. times, device sizes, etc.) so we don't bother adding those
+ * here.
+ */
+typedef struct prfdinfo {
+	int		pr_fd;
+	mode_t		pr_mode;
+
+	uid_t		pr_uid;
+	gid_t		pr_gid;
+
+	major_t		pr_major;	/* think stat.st_dev */
+	minor_t		pr_minor;
+
+	major_t		pr_rmajor;	/* think stat.st_rdev */
+	minor_t		pr_rminor;
+
+	ino64_t		pr_ino;
+	off64_t		pr_offset;
+	off64_t		pr_size;
+
+	int		pr_fileflags;	/* fcntl(F_GETXFL), etc */
+	int		pr_fdflags;	/* fcntl(F_GETFD), etc. */
+
+	char		pr_path[MAXPATHLEN];
+} prfdinfo_t;
+
+/*
  * Header for /proc/<pid>/lstatus /proc/<pid>/lpsinfo /proc/<pid>/lusage
  */
 typedef struct prheader {
diff --git a/usr/src/uts/common/sys/utsname.h b/usr/src/uts/common/sys/utsname.h
index 2b9cf0e33f..4a2aca442c 100644
--- a/usr/src/uts/common/sys/utsname.h
+++ b/usr/src/uts/common/sys/utsname.h
@@ -31,8 +31,6 @@
 #ifndef _SYS_UTSNAME_H
 #define	_SYS_UTSNAME_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/feature_tests.h>
 
 #ifdef	__cplusplus
@@ -67,13 +65,9 @@ extern struct utsname utsname;
 
 #if defined(__STDC__)
 
-#if !defined(__lint)
-static int uname(struct utsname *);
-static int _uname(struct utsname *);
-#else
 extern int uname(struct utsname *);
 extern int _uname(struct utsname *);
-#endif
+
 #if !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__)
 extern int nuname(struct utsname *);
 #endif /* !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__) */
@@ -81,13 +75,9 @@ extern int _nuname(struct utsname *);
 
 #else	/* defined(__STDC__) */
 
-#if !defined(__lint)
-static int uname();
-static int _uname();
-#else
 extern int uname();
 extern int _uname();
-#endif
+
 #if !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__)
 extern int nuname();
 #endif /* !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__) */
@@ -95,30 +85,20 @@ extern int _nuname();
 
 #endif	/* defined(__STDC__) */
 
-
-#if !defined(__lint)
-static int
-#if defined(__STDC__)
-_uname(struct utsname *_buf)
-#else
-_uname(_buf)
-struct utsname *_buf;
-#endif
-{
-	return (_nuname(_buf));
-}
-
-static int
-#if defined(__STDC__)
-uname(struct utsname *_buf)
+/*
+ * On i386 in SVID.2 uname() returns a utsname structure with 8 byte members,
+ * and nuname() returns the real struct utsname.  In SVID.3 uname and nuname
+ * are equivalent.  Anyone who includes this header gets the SVID.3 behaviour.
+ * The SVID.2 behaviour exists solely for compatibility, and is what is
+ * implemented by the libc uname/_uname entrypoints.
+ */
+#ifdef __PRAGMA_REDEFINE_EXTNAME
+#pragma redefine_extname	uname	_nuname
+#pragma redefine_extname	_uname	_nuname
 #else
-uname(_buf)
-struct utsname *_buf;
+#define	uname	_nuname
+#define	_uname	_nuname
 #endif
-{
-	return (_nuname(_buf));
-}
-#endif /* !defined(__lint) */
 
 #else	/* defined(__i386) */
 
diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c
index 5e87d073e3..7f37529941 100644
--- a/usr/src/uts/common/syscall/poll.c
+++ b/usr/src/uts/common/syscall/poll.c
@@ -308,7 +308,7 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
 		deadline = 0;
 	} else {
 		/* They must wait at least a tick. */
-		deadline = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
+		deadline = ((hrtime_t)tsp->tv_sec * NANOSEC) + tsp->tv_nsec;
 		deadline = MAX(deadline, nsec_per_tick);
 		deadline += gethrtime();
 	}