summaryrefslogtreecommitdiff
path: root/usr/src/uts/i86pc/i86hvm
diff options
context:
space:
mode:
authoredp <none@none>2008-04-14 22:44:34 -0700
committeredp <none@none>2008-04-14 22:44:34 -0700
commiteb0cc229f19c437a6b538d3ac0d0443268290b7e (patch)
treee4d394e7fb7dcf49c28308fb78ab8627e6a9a553 /usr/src/uts/i86pc/i86hvm
parent6eb35ee750312cc65aa066dd0f625b9d54c6f86e (diff)
downloadillumos-joyent-eb0cc229f19c437a6b538d3ac0d0443268290b7e.tar.gz
6683029 Install -G and i86hvm don't mix well
6683772 yacc yacks when confronted with libumem --HG-- rename : usr/src/uts/i86pc/Makefile.hvm => usr/src/uts/i86pc/i86hvm/Makefile.i86hvm rename : usr/src/uts/i86pc/hvm_bootstrap/Makefile => usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile rename : usr/src/uts/common/xen/io/hvm_bootstrap.c => usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c rename : usr/src/uts/i86pc/io/pv_cmdk.c => usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c rename : usr/src/uts/i86pc/io/pv_rtls.c => usr/src/uts/i86pc/i86hvm/io/pv_rtls.c rename : usr/src/uts/i86pc/io/xpv/evtchn.c => usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c rename : usr/src/uts/i86pc/io/xpv/xpv.conf => usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf rename : usr/src/uts/i86pc/io/xpv/xpv_support.c => usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c rename : usr/src/uts/i86pc/pv_cmdk/Makefile => usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile rename : usr/src/uts/i86pc/pv_rtls/Makefile => usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile rename : usr/src/uts/i86pc/sys/xpv_support.h => usr/src/uts/i86pc/i86hvm/sys/xpv_support.h rename : usr/src/uts/i86pc/xdf/Makefile => usr/src/uts/i86pc/i86hvm/xdf/Makefile rename : usr/src/uts/i86pc/xnf/Makefile => usr/src/uts/i86pc/i86hvm/xnf/Makefile rename : usr/src/uts/i86pc/xpv/Makefile => usr/src/uts/i86pc/i86hvm/xpv/Makefile rename : usr/src/uts/i86pc/xpvd/Makefile => usr/src/uts/i86pc/i86hvm/xpvd/Makefile
Diffstat (limited to 'usr/src/uts/i86pc/i86hvm')
-rw-r--r--usr/src/uts/i86pc/i86hvm/Makefile108
-rw-r--r--usr/src/uts/i86pc/i86hvm/Makefile.files49
-rw-r--r--usr/src/uts/i86pc/i86hvm/Makefile.i86hvm66
-rw-r--r--usr/src/uts/i86pc/i86hvm/Makefile.rules73
-rw-r--r--usr/src/uts/i86pc/i86hvm/Makefile.targ63
-rw-r--r--usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile83
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c89
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c1541
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/pv_rtls.c79
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c389
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf28
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c956
-rw-r--r--usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile102
-rw-r--r--usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile90
-rw-r--r--usr/src/uts/i86pc/i86hvm/sys/xpv_support.h91
-rw-r--r--usr/src/uts/i86pc/i86hvm/xdf/Makefile89
-rw-r--r--usr/src/uts/i86pc/i86hvm/xnf/Makefile95
-rw-r--r--usr/src/uts/i86pc/i86hvm/xpv/Makefile98
-rw-r--r--usr/src/uts/i86pc/i86hvm/xpvd/Makefile90
19 files changed, 4179 insertions, 0 deletions
diff --git a/usr/src/uts/i86pc/i86hvm/Makefile b/usr/src/uts/i86pc/i86hvm/Makefile
new file mode 100644
index 0000000000..07de533f70
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/Makefile
@@ -0,0 +1,108 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/i86hvm/Makefile
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the i86hvm platform modules.
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+def := TARGET= def
+all := TARGET= all
+install := TARGET= install
+install_h := TARGET= install_h
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+lintlib := TARGET= lintlib
+modlintlib := TARGET= modlintlib
+modlist := TARGET= modlist
+modlist := NO_STATE= -K $$MODSTATE$$$$
+clean.lint := TARGET= clean.lint
+check := TARGET= check
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+.PARALLEL: $(HVM_KMODS)
+
+def all clean clobber clean.lint modlist modlintlib: $(HVM_KMODS)
+
+install: install_implementations .WAIT \
+ $(HVM_KMODS)
+
+install_implementations: \
+ $(ROOT_HVM_DIR) \
+ $(ROOT_HVM_DRV_DIR) \
+ $(ROOT_HVM_MISC_DIR) \
+ $(USR_HVM_DIR)
+
+$(HVM_KMODS): FRC
+ @cd $@; pwd; $(MAKE) $(NO_STATE) $(TARGET)
+
+install_h check: FRC
+
+lintlib lint: modlintlib .WAIT $(LINT_DEPS)
+
+#
+# The 'lint.platmod' target lints the i86hvm platform modules against the i86pc
+# kernel. This ends up doing all the kernel cross-checks.
+#
+LINT_TARGET = lint.platmod
+INTEL_LIB_DIR = $(UTSBASE)/intel/lint-libs/$(OBJS_DIR)
+INTEL_LINTS = genunix
+LINT_LIBS = $(LINT_LIB) \
+ -L$(HVM_LINT_LIB_DIR) \
+ -L$(LINT_LIB_DIR) \
+ $(GENUNIX_KMODS:%=-l%) \
+ $(PARALLEL_KMODS:%=-l%) \
+ $(CLOSED_KMODS:%=-l%) \
+ -L$(INTEL_LIB_DIR) \
+ $(INTEL_LINTS:%=-l%)
+
+# workaround for multiply defined errors
+lint.platmod := LINTFLAGS += -erroff=E_NAME_MULTIPLY_DEF2
+
+lint.platmod: modlintlib
+ @-$(ECHO) "\ni86hvm platform-dependent module: global crosschecks:"
+ @-$(LINT) $(LINTFLAGS) $(LINT_LIBS) 2>&1 | $(LGREP.2)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.files b/usr/src/uts/i86pc/i86hvm/Makefile.files
new file mode 100644
index 0000000000..03ff880f7c
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/Makefile.files
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This Makefile defines file modules in the directory uts/i86pc/i86hvm
+# and its children. These are the source files which are i86pc/i86hvm
+# "implementation architecture" dependent.
+#
+
+#
+# Define objects
+#
+PV_CMDK_OBJS += pv_cmdk.o
+PV_RTLS_OBJS += pv_rtls.o
+HVM_BOOTSTRAP_OBJS += hvm_bootstrap.o
+XDF_OBJS += xdf.o
+XNF_OBJS += xnf.o
+XPV_OBJS += xpv_support.o xvdi.o gnttab.o evtchn.o \
+ xenbus_comms.o xenbus_client.o xenbus_probe.o \
+ xenbus_xs.o hypercall.o hypersubr.o
+XPVD_OBJS += xpvd.o
+
+#
+# Include i86hvm header files
+# -I$(UTSBASE)/../common
+INC_PATH += -I$(UTSBASE)/common/xen -I$(UTSBASE)/i86pc/i86hvm
diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm b/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm
new file mode 100644
index 0000000000..0e414c5fb1
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/Makefile.hvm
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile provides support for building PV drivers that run
+# in an HVM environment.
+#
+
+ROOT_HVM_DIR = $(ROOT_PLAT_DIR)/i86hvm
+ROOT_HVM_MOD_DIR = $(ROOT_HVM_DIR)/kernel
+ROOT_HVM_DRV_DIR_32 = $(ROOT_HVM_MOD_DIR)/drv
+ROOT_HVM_DRV_DIR_64 = $(ROOT_HVM_MOD_DIR)/drv/$(MACH64)
+ROOT_HVM_DRV_DIR = $(ROOT_HVM_DRV_DIR_$(CLASS))
+ROOT_HVM_MISC_DIR_32 = $(ROOT_HVM_MOD_DIR)/misc
+ROOT_HVM_MISC_DIR_64 = $(ROOT_HVM_MOD_DIR)/misc/$(MACH64)
+ROOT_HVM_MISC_DIR = $(ROOT_HVM_MISC_DIR_$(CLASS))
+USR_HVM_DIR = $(USR_PLAT_DIR)/i86hvm
+
+HVM_LINT_LIB_DIR= $(UTSBASE)/$(PLATFORM)/i86hvm/lint-libs/$(OBJS_DIR)
+
+#
+# Define modules.
+#
+HVM_DRV_KMODS = pv_cmdk pv_rtls xdf xnf xpv xpvd
+HVM_MISC_KMODS = hvm_bootstrap
+HVM_KMODS = $(HVM_DRV_KMODS) $(HVM_MISC_KMODS)
+
+include $(UTSBASE)/i86pc/i86hvm/Makefile.files
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+LINTS_DIR = $(OBJS_DIR)
+LINT_LIB_DIR = $(UTSBASE)/$(PLATFORM)/lint-libs/$(OBJS_DIR)
+
+#
+# Indicate that we are building for the i86hvm semi-platform.
+# Also use Solaris specific code in xen public header files.
+#
+CPPFLAGS += -DXPV_HVM_DRIVER -D_SOLARIS
+ASFLAGS += -DXPV_HVM_DRIVER
diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.rules b/usr/src/uts/i86pc/i86hvm/Makefile.rules
new file mode 100644
index 0000000000..4a9c0edec8
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/Makefile.rules
@@ -0,0 +1,73 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# This Makefile defines the build rules for the directory
+# uts/i86pc/i86hvm.
+#
+# The following two-level ordering must be maintained in this file.
+# Lines are sorted first in order of decreasing specificity based on
+# the first directory component. That is, i86pc rules come before
+# intel rules come before common rules.
+#
+# Lines whose initial directory components are equal are sorted
+# alphabetically by the remaining components.
+
+#
+# Section 1a: C object build rules
+#
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/i86hvm/io/xpv/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/i86hvm/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/xen/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/xen/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+#
+# Section 1b: Lint `object' build rules
+#
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/i86hvm/io/xpv/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/i86hvm/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/os/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.targ b/usr/src/uts/i86pc/i86hvm/Makefile.targ
new file mode 100644
index 0000000000..29493a64fb
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/Makefile.targ
@@ -0,0 +1,63 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Common targets for i86hvm platform-implementation specific modules.
+#
+
+.KEEP_STATE:
+
+#
+# Rules for implementation subdirectories.
+#
+$(ROOT_HVM_DIR):
+ -$(INS.dir.root.sys)
+
+$(ROOT_HVM_MOD_DIR): $(ROOT_HVM_DIR)
+ -$(INS.dir.root.sys)
+
+$(ROOT_HVM_DRV_DIR): $(ROOT_MOD_DIR)
+ -$(INS.dir.root.sys)
+
+$(ROOT_HVM_MISC_DIR): $(ROOT_MOD_DIR)
+ -$(INS.dir.root.sys)
+
+$(ROOT_HVM_MOD_DIR)/%: $(OBJS_DIR)/% $(ROOT_HVM_MOD_DIR) FRC
+ $(INS.file)
+
+$(ROOT_HVM_DRV_DIR)/%: $(OBJS_DIR)/% $(ROOT_HVM_DRV_DIR) FRC
+ $(INS.file)
+
+$(ROOT_HVM_MISC_DIR)/%: $(OBJS_DIR)/% $(ROOT_HVM_MISC_DIR) FRC
+ $(INS.file)
+
+$(USR_HVM_DIR):
+ -$(INS.dir.root.sys)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/i86hvm/Makefile.rules
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile b/usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile
new file mode 100644
index 0000000000..ea250a88a7
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# uts/i86pc/hvm_bootstrap/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# i86pc architecture dependent
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = hvm_bootstrap
+OBJECTS = $(HVM_BOOTSTRAP_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(HVM_BOOTSTRAP_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+# Overrides
+LDFLAGS += -dy
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c b/usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c
new file mode 100644
index 0000000000..95b9df1a82
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c
@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+
+/*
+ * The hvm_bootstrap misc module is installed in the i86hvm platform
+ * directly so it will only be loaded in HVM emulated environment.
+ */
+
+
+/*
+ * hvmboot_rootconf() exists to force attach all xdf disk driver nodes
+ * before the pv cmdk disk driver comes along and tries to access any of
+ * these nodes (which usually happens when mounting the root disk device
+ * in an hvm environment). See the block comments at the top of pv_cmdk.c
+ * for more information about why this is necessary.
+ */
+int
+hvmboot_rootconf()
+{
+ dev_info_t *xpvd_dip;
+ major_t xdf_major;
+
+ xdf_major = ddi_name_to_major("xdf");
+ if (xdf_major == (major_t)-1)
+ cmn_err(CE_PANIC, "unable to load xdf disk driver");
+
+ if (resolve_pathname("/xpvd", &xpvd_dip, NULL, NULL) != 0)
+ cmn_err(CE_PANIC, "unable to configure /xpvd nexus");
+
+ (void) ndi_devi_config_driver(xpvd_dip, 0, xdf_major);
+
+ ndi_rele_devi(xpvd_dip);
+ return (0);
+}
+
+static struct modlmisc modlmisc = {
+ &mod_miscops, "hvm_bootstrap misc module"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlmisc, NULL
+};
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_init()
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+ return (EBUSY);
+}
diff --git a/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c b/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c
new file mode 100644
index 0000000000..4ad9b06aec
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c
@@ -0,0 +1,1541 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/scsi/scsi_types.h>
+#include <sys/modctl.h>
+#include <sys/cmlb.h>
+#include <sys/types.h>
+#include <sys/xpv_support.h>
+#include <sys/xendev.h>
+#include <sys/gnttab.h>
+#include <public/xen.h>
+#include <public/grant_table.h>
+#include <io/xdf.h>
+#include <sys/vtoc.h>
+#include <sys/dkio.h>
+#include <sys/dktp/dadev.h>
+#include <sys/dktp/dadkio.h>
+#include <sys/dktp/tgdk.h>
+#include <sys/dktp/bbh.h>
+#include <sys/dktp/cmdk.h>
+#include <sys/dktp/altsctr.h>
+
+/*
+ * General Notes
+ *
+ * We don't support disks with bad block mappins. We have this
+ * limitation because the underlying xdf driver doesn't support
+ * bad block remapping. If there is a need to support this feature
+ * it should be added directly to the xdf driver and we should just
+ * pass requests strait on through and let it handle the remapping.
+ * Also, it's probably worth pointing out that most modern disks do bad
+ * block remapping internally in the hardware so there's actually less
+ * of a chance of us ever discovering bad blocks. Also, in most cases
+ * this driver (and the xdf driver) will only be used with virtualized
+ * devices, so one might wonder why a virtual device would ever actually
+ * experience bad blocks. To wrap this up, you might be wondering how
+ * these bad block mappings get created and how they are managed. Well,
+ * there are two tools for managing bad block mappings, format(1M) and
+ * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk
+ * to attempt to find bad block and create mappings for them. Format(1M)
+ * and addbadsec(1M) can also be used to edit existing mappings that may
+ * be saved on the disk.
+ *
+ * The underlying PV driver that this driver passes on requests to is the
+ * xdf driver. Since in most cases the xdf driver doesn't deal with
+ * physical disks it has it's own algorithm for assigning a physical
+ * geometry to a virtual disk (ie, cylinder count, head count, etc.)
+ * The default values chosen by the xdf driver may not match those
+ * assigned to a disk by a hardware disk emulator in an HVM environment.
+ * This is a problem since these physical geometry attributes affect
+ * things like the partition table, backup label location, etc. So
+ * to emulate disk devices correctly we need to know the physical geometry
+ * that was assigned to a disk at the time of it's initalization.
+ * Normally in an HVM environment this information will passed to
+ * the BIOS and operating system from the hardware emulator that is
+ * emulating the disk devices. In the case of a solaris dom0+xvm
+ * this would be qemu. So to work around this issue, this driver will
+ * query the emulated hardware to get the assigned physical geometry
+ * and then pass this geometry onto the xdf driver so that it can use it.
+ * But really, this information is essentially metadata about the disk
+ * that should be kept with the disk image itself. (Assuming or course
+ * that a disk image is the actual backingstore for this emulated device.)
+ * This metadata should also be made available to PV drivers via a common
+ * mechamisn, probably the xenstore. The fact that this metadata isn't
+ * available outside of HVM domains means that it's difficult to move
+ * disks between HVM and PV domains, since a fully PV domain will have no
+ * way of knowing what the correct geometry of the target device is.
+ * (Short of reading the disk, looking for things like partition tables
+ * and labels, and taking a best guess at what the geometry was when
+ * the disk was initialized. Unsuprisingly, qemu actually does this.)
+ *
+ * This driver has to map cmdk device instances into their corresponding
+ * xdf device instances. We have to do this to ensure that when a user
+ * accesses a emulated cmdk device we map those accesses to the proper
+ * paravirtualized device. Basically what we need to know is how multiple
+ * 'disk' entries in a domU configuration file get mapped to emulated
+ * cmdk devices and to xdf devices. The 'disk' entry to xdf instance
+ * mappings we know because those are done within the Solaris xvdi code
+ * and the xpvd nexus driver. But the config to emulated devices mappings
+ * are handled entirely within the xen management tool chain and the
+ * hardware emulator. Since all the tools that establish these mappings
+ * live in dom0, dom0 should really supply us with this information,
+ * probably via the xenstore. Unfortunatly it doesn't so, since there's
+ * no good way to determine this mapping dynamically, this driver uses
+ * a hard coded set of static mappings. These mappings are hardware
+ * emulator specific because each different hardware emulator could have
+ * a different device tree with different cmdk device paths. This
+ * means that if we want to continue to use this static mapping approach
+ * to allow Solaris to run on different hardware emulators we'll have
+ * to analyze each of those emulators to determine what paths they
+ * use and hard code those paths into this driver. yech. This metadata
+ * really needs to be supplied to us by dom0.
+ *
+ * This driver access underlying xdf nodes. Unfortunatly, devices
+ * must create minor nodes during attach, and for disk devices to create
+ * minor nodes, they have to look at the label on the disk, so this means
+ * that disk drivers must be able to access a disk contents during
+ * attach. That means that this disk driver must be able to access
+ * underlying xdf nodes during attach. Unfortunatly, due to device tree
+ * locking restrictions, we cannot have an attach operation occuring on
+ * this device and then attempt to access another device which may
+ * cause another attach to occur in a different device tree branch
+ * since this could result in deadlock. Hence, this driver can only
+ * access xdf device nodes that we know are attached, and it can't use
+ * any ddi interfaces to access those nodes if those interfaces could
+ * trigger an attach of the xdf device. So this driver works around
+ * these restrictions by talking directly to xdf devices via
+ * xdf_hvm_hold(). This interface takes a pathname to an xdf device,
+ * and if that device is already attached then it returns the a held dip
+ * pointer for that device node. This prevents us from getting into
+ * deadlock situations, but now we need a mechanism to ensure that all
+ * the xdf device nodes this driver might access are attached before
+ * this driver tries to access them. This is accomplished via the
+ * hvmboot_rootconf() callback which is invoked just before root is
+ * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure
+ * all xdf device visible to the system. All these xdf device nodes
+ * will also be marked with the "ddi-no-autodetach" property so that
+ * once they are configured, the will not be automatically unconfigured.
+ * The only way that they could be unconfigured is if the administrator
+ * explicitly attempts to unload required modules via rem_drv(1M)
+ * or modunload(1M).
+ */
+
+/*
+ * 16 paritions + fdisk (see xdf.h)
+ */
+#define XDF_DEV2UNIT(dev) XDF_INST((getminor((dev))))
+#define XDF_DEV2PART(dev) XDF_PART((getminor((dev))))
+
+#define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \
+ (otyp == OTYP_CHR) || \
+ (otyp == OTYP_LYR))
+
+#define PV_CMDK_NODES 4
+
+typedef struct hvm_to_pv {
+ char *h2p_hvm_path;
+ char *h2p_pv_path;
+} hvm_to_pv_t;
+
+/*
+ */
+static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = {
+ /*
+ * The paths mapping here are very specific to xen and qemu. When a
+ * domU is booted under xen in HVM mode, qemu is normally used to
+ * emulate up to four ide disks. These disks always have the four
+ * path listed below. To configure an emulated ide device, the
+ * xen domain configuration file normally has an entry that looks
+ * like this:
+ * disk = [ 'file:/foo.img,hda,w' ]
+ *
+ * The part we're interested in is the 'hda', which we'll call the
+ * xen disk device name here. The xen management tools (which parse
+ * the xen domain configuration file and launch qemu) makes the
+ * following assumptions about this value:
+ * hda == emulated ide disk 0 (ide bus 0, master)
+ * hdb == emulated ide disk 1 (ide bus 0, slave)
+ * hdc == emulated ide disk 2 (ide bus 1, master)
+ * hdd == emulated ide disk 3 (ide bus 1, slave)
+ *
+ * (Uncoincidentally, these xen disk device names actually map to
+ * the /dev filesystem names of ide disk devices in Linux. So in
+ * Linux /dev/hda is the first ide disk.) So for the first part of
+ * our mapping we've just hardcoded the cmdk paths that we know
+ * qemu will use.
+ *
+ * To understand the second half of the mapping (ie, the xdf device
+ * that each emulated cmdk device should be mapped two) we need to
+ * know the solaris device node address that will be assigned to
+ * each xdf device. (The device node address is the hex number that
+ * comes after the "xdf@" in the device path.)
+ *
+ * Normally when a domU is run in non-HVM mode, the xen disk device
+ * names in the xen domain configuration file are specified with
+ * integers instead of Linux device names. (for example, '0' would
+ * be used instead of 'hda'.) So in the non-HVM case we simply
+ * convert the xen disk device name (which is an interger) into a
+ * hex number and use it as the Solaris xdf device node address.
+ * But when we're running in HVM mode then we have a string for the
+ * xen disk device name, so we can't simply use that as a solaris
+ * device node address. Instead we fall back to using the xenstore
+ * device id for the xen disk device as the xdf device node address.
+ * The xdf device node address assignment happens in xvdi_init_dev().
+ *
+ * So the question becomes, how do we know what the xenstore device
+ * id for emulated disk will be? Well, it turns out that since the
+ * xen management tools expect the disk device names to be Linux
+ * device names, those same management tools assign each disk a
+ * device id that matches the dev_t of the corresponding device
+ * under Linux. (Big shocker.) This xen device name-to-id mapping
+ * is currently all hard coded here:
+ * xen.hg/tools/python/xen/util/blkif.py`blkdev_name_to_number()
+ *
+ * So looking at the code above we can see the following xen disk
+ * device name to xenstore device id mappings:
+ * 'hda' --> 0x300 == 0t768 == ((3 * 256) + (0 * 64))
+ * 'hdb' --> 0x340 == 0t832 == ((3 * 256) + (1 * 64))
+ * 'hdc' --> 0x1600 == 0t5632 == ((22 * 256) + (0 * 64))
+ * 'hdd' --> 0x1640 == 0t5696 == ((22 * 256) + (1 * 64))
+ */
+ { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@0,0", "/xpvd/xdf@300" },
+ { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@1,0", "/xpvd/xdf@340" },
+ { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@0,0", "/xpvd/xdf@1600" },
+ { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@1,0", "/xpvd/xdf@1640" },
+ { NULL, 0 }
+};
+
+typedef struct pv_cmdk {
+ dev_info_t *dk_dip;
+ cmlb_handle_t dk_cmlbhandle;
+ ddi_devid_t dk_devid;
+ kmutex_t dk_mutex;
+ dev_info_t *dk_xdf_dip;
+ dev_t dk_xdf_dev;
+ int dk_xdf_otyp_count[OTYPCNT][XDF_PEXT];
+ ldi_handle_t dk_xdf_lh[XDF_PEXT];
+} pv_cmdk_t;
+
+/*
+ * Globals
+ */
+static void *pv_cmdk_state;
+static major_t pv_cmdk_major;
+static hvm_to_pv_t *pv_cmdk_h2p;
+
+/*
+ * Function prototypes for xdf callback functions
+ */
+extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
+extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
+ void *);
+
+static boolean_t
+pv_cmdk_isopen_part(struct pv_cmdk *dkp, int part)
+{
+ int otyp;
+
+ ASSERT(MUTEX_HELD(&dkp->dk_mutex));
+
+ for (otyp = 0; (otyp < OTYPCNT); otyp++) {
+ if (dkp->dk_xdf_otyp_count[otyp][part] != 0)
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+/*
+ * Cmlb ops vectors, allows the cmlb module to directly access the entire
+ * pv_cmdk disk device without going through any partitioning layers.
+ */
+/*ARGSUSED*/
+static int
+pv_cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr,
+ diskaddr_t start, size_t count, void *tg_cookie)
+{
+ int instance = ddi_get_instance(dip);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+
+ if (dkp == NULL)
+ return (ENXIO);
+
+ return (xdf_lb_rdwr(dkp->dk_xdf_dip, cmd, bufaddr, start, count,
+ tg_cookie));
+}
+
+/*ARGSUSED*/
+static int
+pv_cmdk_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
+{
+ int instance = ddi_get_instance(dip);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ int err;
+
+ if (dkp == NULL)
+ return (ENXIO);
+
+ if (cmd == TG_GETVIRTGEOM) {
+ cmlb_geom_t pgeom, *vgeomp;
+ diskaddr_t capacity;
+
+ /*
+ * The native xdf driver doesn't support this ioctl.
+ * Intead of passing it on, emulate it here so that the
+ * results look the same as what we get for a real cmdk
+ * device.
+ *
+ * Get the real size of the device
+ */
+ if ((err = xdf_lb_getinfo(dkp->dk_xdf_dip,
+ TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0)
+ return (err);
+ capacity = pgeom.g_capacity;
+
+ /*
+ * If the controller returned us something that doesn't
+ * really fit into an Int 13/function 8 geometry
+ * result, just fail the ioctl. See PSARC 1998/313.
+ */
+ if (capacity >= (63 * 254 * 1024))
+ return (EINVAL);
+
+ vgeomp = (cmlb_geom_t *)arg;
+ vgeomp->g_capacity = capacity;
+ vgeomp->g_nsect = 63;
+ vgeomp->g_nhead = 254;
+ vgeomp->g_ncyl = capacity / (63 * 254);
+ vgeomp->g_acyl = 0;
+ vgeomp->g_secsize = 512;
+ vgeomp->g_intrlv = 1;
+ vgeomp->g_rpm = 3600;
+ return (0);
+ }
+
+ return (xdf_lb_getinfo(dkp->dk_xdf_dip, cmd, arg, tg_cookie));
+}
+
+static cmlb_tg_ops_t pv_cmdk_lb_ops = {
+ TG_DK_OPS_VERSION_1,
+ pv_cmdk_lb_rdwr,
+ pv_cmdk_lb_getinfo
+};
+
+/*
+ * devid management functions
+ */
+
+/*
+ * pv_cmdk_get_modser() is basically a local copy of
+ * cmdk_get_modser() modified to work without the dadk layer.
+ * (which the non-pv version of the cmdk driver uses.)
+ */
+static int
+pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len)
+{
+ struct scsi_device *scsi_device;
+ opaque_t ctlobjp;
+ dadk_ioc_string_t strarg;
+ char *s;
+ char ch;
+ boolean_t ret;
+ int i;
+ int tb;
+
+ strarg.is_buf = buf;
+ strarg.is_size = len;
+ scsi_device = ddi_get_driver_private(dkp->dk_dip);
+ ctlobjp = scsi_device->sd_address.a_hba_tran;
+ if (CTL_IOCTL(ctlobjp,
+ ioccmd, (uintptr_t)&strarg, FNATIVE | FKIOCTL) != 0)
+ return (0);
+
+ /*
+ * valid model/serial string must contain a non-zero non-space
+ * trim trailing spaces/NULL
+ */
+ ret = B_FALSE;
+ s = buf;
+ for (i = 0; i < strarg.is_size; i++) {
+ ch = *s++;
+ if (ch != ' ' && ch != '\0')
+ tb = i + 1;
+ if (ch != ' ' && ch != '\0' && ch != '0')
+ ret = B_TRUE;
+ }
+
+ if (ret == B_FALSE)
+ return (0);
+
+ return (tb);
+}
+
+/*
+ * pv_cmdk_devid_modser() is basically a copy of cmdk_devid_modser()
+ * that has been modified to use local pv cmdk driver functions.
+ *
+ * Build a devid from the model and serial number
+ * Return DDI_SUCCESS or DDI_FAILURE.
+ */
+static int
+pv_cmdk_devid_modser(struct pv_cmdk *dkp)
+{
+ int rc = DDI_FAILURE;
+ char *hwid;
+ int modlen;
+ int serlen;
+
+ /*
+ * device ID is a concatenation of model number, '=', serial number.
+ */
+ hwid = kmem_alloc(CMDK_HWIDLEN, KM_SLEEP);
+ modlen = pv_cmdk_get_modser(dkp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN);
+ if (modlen == 0)
+ goto err;
+
+ hwid[modlen++] = '=';
+ serlen = pv_cmdk_get_modser(dkp, DIOCTL_GETSERIAL,
+ hwid + modlen, CMDK_HWIDLEN - modlen);
+ if (serlen == 0)
+ goto err;
+
+ hwid[modlen + serlen] = 0;
+
+ /* Initialize the device ID, trailing NULL not included */
+ rc = ddi_devid_init(dkp->dk_dip, DEVID_ATA_SERIAL, modlen + serlen,
+ hwid, (ddi_devid_t *)&dkp->dk_devid);
+ if (rc != DDI_SUCCESS)
+ goto err;
+
+ kmem_free(hwid, CMDK_HWIDLEN);
+ return (DDI_SUCCESS);
+
+err:
+ kmem_free(hwid, CMDK_HWIDLEN);
+ return (DDI_FAILURE);
+}
+
+/*
+ * pv_cmdk_devid_read() is basically a local copy of
+ * cmdk_devid_read() modified to work without the dadk layer.
+ * (which the non-pv version of the cmdk driver uses.)
+ *
+ * Read a devid from on the first block of the last track of
+ * the last cylinder. Make sure what we read is a valid devid.
+ * Return DDI_SUCCESS or DDI_FAILURE.
+ */
+static int
+pv_cmdk_devid_read(struct pv_cmdk *dkp)
+{
+ diskaddr_t blk;
+ struct dk_devid *dkdevidp;
+ uint_t *ip, chksum;
+ int i;
+
+ if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0)
+ return (DDI_FAILURE);
+
+ dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP);
+ if (pv_cmdk_lb_rdwr(dkp->dk_dip,
+ TG_READ, dkdevidp, blk, NBPSCTR, NULL) != 0)
+ goto err;
+
+ /* Validate the revision */
+ if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) ||
+ (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB))
+ goto err;
+
+ /* Calculate the checksum */
+ chksum = 0;
+ ip = (uint_t *)dkdevidp;
+ for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++)
+ chksum ^= ip[i];
+ if (DKD_GETCHKSUM(dkdevidp) != chksum)
+ goto err;
+
+ /* Validate the device id */
+ if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS)
+ goto err;
+
+ /* keep a copy of the device id */
+ i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid);
+ dkp->dk_devid = kmem_alloc(i, KM_SLEEP);
+ bcopy(dkdevidp->dkd_devid, dkp->dk_devid, i);
+ kmem_free(dkdevidp, NBPSCTR);
+ return (DDI_SUCCESS);
+
+err:
+ kmem_free(dkdevidp, NBPSCTR);
+ return (DDI_FAILURE);
+}
+
+/*
+ * pv_cmdk_devid_fabricate() is basically a local copy of
+ * cmdk_devid_fabricate() modified to work without the dadk layer.
+ * (which the non-pv version of the cmdk driver uses.)
+ *
+ * Create a devid and write it on the first block of the last track of
+ * the last cylinder.
+ * Return DDI_SUCCESS or DDI_FAILURE.
+ */
+static int
+pv_cmdk_devid_fabricate(struct pv_cmdk *dkp)
+{
+ ddi_devid_t devid = NULL; /* devid made by ddi_devid_init */
+ struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */
+ diskaddr_t blk;
+ uint_t *ip, chksum;
+ int i;
+
+ if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0)
+ return (DDI_FAILURE);
+
+ if (ddi_devid_init(dkp->dk_dip, DEVID_FAB, 0, NULL, &devid) !=
+ DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ /* allocate a buffer */
+ dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP);
+
+ /* Fill in the revision */
+ dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB;
+ dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB;
+
+ /* Copy in the device id */
+ i = ddi_devid_sizeof(devid);
+ if (i > DK_DEVID_SIZE)
+ goto err;
+ bcopy(devid, dkdevidp->dkd_devid, i);
+
+ /* Calculate the chksum */
+ chksum = 0;
+ ip = (uint_t *)dkdevidp;
+ for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++)
+ chksum ^= ip[i];
+
+ /* Fill in the checksum */
+ DKD_FORMCHKSUM(chksum, dkdevidp);
+
+ if (pv_cmdk_lb_rdwr(dkp->dk_dip,
+ TG_WRITE, dkdevidp, blk, NBPSCTR, NULL) != 0)
+ goto err;
+
+ kmem_free(dkdevidp, NBPSCTR);
+
+ dkp->dk_devid = devid;
+ return (DDI_SUCCESS);
+
+err:
+ if (dkdevidp != NULL)
+ kmem_free(dkdevidp, NBPSCTR);
+ if (devid != NULL)
+ ddi_devid_free(devid);
+ return (DDI_FAILURE);
+}
+
+/*
+ * pv_cmdk_devid_setup() is basically a local copy ofcmdk_devid_setup()
+ * that has been modified to use local pv cmdk driver functions.
+ *
+ * Create and register the devid.
+ * There are 4 different ways we can get a device id:
+ * 1. Already have one - nothing to do
+ * 2. Build one from the drive's model and serial numbers
+ * 3. Read one from the disk (first sector of last track)
+ * 4. Fabricate one and write it on the disk.
+ * If any of these succeeds, register the deviceid
+ */
+static void
+pv_cmdk_devid_setup(struct pv_cmdk *dkp)
+{
+ int rc;
+
+ /* Try options until one succeeds, or all have failed */
+
+ /* 1. All done if already registered */
+
+ if (dkp->dk_devid != NULL)
+ return;
+
+ /* 2. Build a devid from the model and serial number */
+ rc = pv_cmdk_devid_modser(dkp);
+ if (rc != DDI_SUCCESS) {
+ /* 3. Read devid from the disk, if present */
+ rc = pv_cmdk_devid_read(dkp);
+
+ /* 4. otherwise make one up and write it on the disk */
+ if (rc != DDI_SUCCESS)
+ rc = pv_cmdk_devid_fabricate(dkp);
+ }
+
+ /* If we managed to get a devid any of the above ways, register it */
+ if (rc == DDI_SUCCESS)
+ (void) ddi_devid_register(dkp->dk_dip, dkp->dk_devid);
+}
+
+/*
+ * Local Functions
+ */
+static int
+pv_cmdk_iodone(struct buf *bp)
+{
+ struct buf *bp_orig = bp->b_chain;
+
+ /* Propegate back the io results */
+ bp_orig->b_resid = bp->b_resid;
+ bioerror(bp_orig, geterror(bp));
+ biodone(bp_orig);
+
+ freerbuf(bp);
+ return (0);
+}
+
+static int
+pv_cmdkstrategy(struct buf *bp)
+{
+ dev_t dev = bp->b_edev;
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ dev_t xdf_devt;
+ struct buf *bp_clone;
+
+ /*
+ * Sanity checks that the dev_t associated with the buf we were
+ * passed actually corresponds us and that the partition we're
+ * trying to access is actually open. On debug kernels we'll
+ * panic and on non-debug kernels we'll return failure.
+ */
+ ASSERT(getmajor(dev) == pv_cmdk_major);
+ if (getmajor(dev) != pv_cmdk_major)
+ goto err;
+
+ mutex_enter(&dkp->dk_mutex);
+ ASSERT(pv_cmdk_isopen_part(dkp, part));
+ if (!pv_cmdk_isopen_part(dkp, part)) {
+ mutex_exit(&dkp->dk_mutex);
+ goto err;
+ }
+ mutex_exit(&dkp->dk_mutex);
+
+ /* clone this buffer */
+ xdf_devt = dkp->dk_xdf_dev | part;
+ bp_clone = bioclone(bp, 0, bp->b_bcount, xdf_devt, bp->b_blkno,
+ pv_cmdk_iodone, NULL, KM_SLEEP);
+ bp_clone->b_chain = bp;
+
+ /*
+ * If we're being invoked on behalf of the physio() call in
+ * pv_cmdk_dioctl_rwcmd() then b_private will be set to
+ * XB_SLICE_NONE and we need to propegate this flag into the
+ * cloned buffer so that the xdf driver will see it.
+ */
+ if (bp->b_private == (void *)XB_SLICE_NONE)
+ bp_clone->b_private = (void *)XB_SLICE_NONE;
+
+ /*
+ * Pass on the cloned buffer. Note that we don't bother to check
+ * for failure because the xdf strategy routine will have to
+ * invoke biodone() if it wants to return an error, which means
+ * that the pv_cmdk_iodone() callback will get invoked and it
+ * will propegate the error back up the stack and free the cloned
+ * buffer.
+ */
+ ASSERT(dkp->dk_xdf_lh[part] != NULL);
+ return (ldi_strategy(dkp->dk_xdf_lh[part], bp_clone));
+
+err:
+ bioerror(bp, ENXIO);
+ bp->b_resid = bp->b_bcount;
+ biodone(bp);
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+pv_cmdkread(dev_t dev, struct uio *uio, cred_t *credp)
+{
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+
+ return (ldi_read(dkp->dk_xdf_lh[part], uio, credp));
+}
+
+/*ARGSUSED*/
+static int
+pv_cmdkwrite(dev_t dev, struct uio *uio, cred_t *credp)
+{
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+
+ return (ldi_write(dkp->dk_xdf_lh[part], uio, credp));
+}
+
+/*ARGSUSED*/
+static int
+pv_cmdkaread(dev_t dev, struct aio_req *aio, cred_t *credp)
+{
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ return (ldi_aread(dkp->dk_xdf_lh[part], aio, credp));
+}
+
+/*ARGSUSED*/
+static int
+pv_cmdkawrite(dev_t dev, struct aio_req *aio, cred_t *credp)
+{
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ return (ldi_awrite(dkp->dk_xdf_lh[part], aio, credp));
+}
+
+static int
+pv_cmdkdump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
+{
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+
+ return (ldi_dump(dkp->dk_xdf_lh[part], addr, blkno, nblk));
+}
+
+/*
+ * pv_rwcmd_copyin() is a duplicate of rwcmd_copyin().
+ */
+static int
+pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag)
+{
+ switch (ddi_model_convert_from(flag)) {
+ case DDI_MODEL_ILP32: {
+ struct dadkio_rwcmd32 cmd32;
+
+ if (ddi_copyin(inaddr, &cmd32,
+ sizeof (struct dadkio_rwcmd32), flag)) {
+ return (EFAULT);
+ }
+
+ rwcmdp->cmd = cmd32.cmd;
+ rwcmdp->flags = cmd32.flags;
+ rwcmdp->blkaddr = (daddr_t)cmd32.blkaddr;
+ rwcmdp->buflen = cmd32.buflen;
+ rwcmdp->bufaddr = (caddr_t)(intptr_t)cmd32.bufaddr;
+ /*
+ * Note: we do not convert the 'status' field,
+ * as it should not contain valid data at this
+ * point.
+ */
+ bzero(&rwcmdp->status, sizeof (rwcmdp->status));
+ break;
+ }
+ case DDI_MODEL_NONE: {
+ if (ddi_copyin(inaddr, rwcmdp,
+ sizeof (struct dadkio_rwcmd), flag)) {
+ return (EFAULT);
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * pv_rwcmd_copyout() is a duplicate of rwcmd_copyout().
+ */
+static int
+pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag)
+{
+ switch (ddi_model_convert_from(flag)) {
+ case DDI_MODEL_ILP32: {
+ struct dadkio_rwcmd32 cmd32;
+
+ cmd32.cmd = rwcmdp->cmd;
+ cmd32.flags = rwcmdp->flags;
+ cmd32.blkaddr = rwcmdp->blkaddr;
+ cmd32.buflen = rwcmdp->buflen;
+ ASSERT64(((uintptr_t)rwcmdp->bufaddr >> 32) == 0);
+ cmd32.bufaddr = (caddr32_t)(uintptr_t)rwcmdp->bufaddr;
+
+ cmd32.status.status = rwcmdp->status.status;
+ cmd32.status.resid = rwcmdp->status.resid;
+ cmd32.status.failed_blk_is_valid =
+ rwcmdp->status.failed_blk_is_valid;
+ cmd32.status.failed_blk = rwcmdp->status.failed_blk;
+ cmd32.status.fru_code_is_valid =
+ rwcmdp->status.fru_code_is_valid;
+ cmd32.status.fru_code = rwcmdp->status.fru_code;
+
+ bcopy(rwcmdp->status.add_error_info,
+ cmd32.status.add_error_info, DADKIO_ERROR_INFO_LEN);
+
+ if (ddi_copyout(&cmd32, outaddr,
+ sizeof (struct dadkio_rwcmd32), flag))
+ return (EFAULT);
+ break;
+ }
+ case DDI_MODEL_NONE: {
+ if (ddi_copyout(rwcmdp, outaddr,
+ sizeof (struct dadkio_rwcmd), flag))
+ return (EFAULT);
+ }
+ }
+ return (0);
+}
+
+static void
+pv_cmdkmin(struct buf *bp)
+{
+ if (bp->b_bcount > DK_MAXRECSIZE)
+ bp->b_bcount = DK_MAXRECSIZE;
+}
+
+static int
+pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag)
+{
+ struct dadkio_rwcmd *rwcmdp;
+ struct iovec aiov;
+ struct uio auio;
+ struct buf *bp;
+ int rw, status;
+
+ rwcmdp = kmem_alloc(sizeof (struct dadkio_rwcmd), KM_SLEEP);
+ status = pv_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag);
+
+ if (status != 0)
+ goto out;
+
+ switch (rwcmdp->cmd) {
+ case DADKIO_RWCMD_READ:
+ case DADKIO_RWCMD_WRITE:
+ break;
+ default:
+ status = EINVAL;
+ goto out;
+ }
+
+ bzero((caddr_t)&aiov, sizeof (struct iovec));
+ aiov.iov_base = rwcmdp->bufaddr;
+ aiov.iov_len = rwcmdp->buflen;
+
+ bzero((caddr_t)&auio, sizeof (struct uio));
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_loffset = (offset_t)rwcmdp->blkaddr * (offset_t)XB_BSIZE;
+ auio.uio_resid = rwcmdp->buflen;
+ auio.uio_segflg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
+
+ /*
+ * Tell the xdf driver that this I/O request is using an absolute
+ * offset.
+ */
+ bp = getrbuf(KM_SLEEP);
+ bp->b_private = (void *)XB_SLICE_NONE;
+
+ rw = ((rwcmdp->cmd == DADKIO_RWCMD_WRITE) ? B_WRITE : B_READ);
+ status = physio(pv_cmdkstrategy, bp, dev, rw, pv_cmdkmin, &auio);
+
+ biofini(bp);
+ kmem_free(bp, sizeof (buf_t));
+
+ if (status == 0)
+ status = pv_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag);
+
+out:
+ kmem_free(rwcmdp, sizeof (struct dadkio_rwcmd));
+ return (status);
+}
+
+static int
+pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
+ int *rvalp)
+{
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ int err;
+
+ switch (cmd) {
+ default:
+ return (ldi_ioctl(dkp->dk_xdf_lh[part],
+ cmd, arg, flag, credp, rvalp));
+ case DKIOCGETWCE:
+ case DKIOCSETWCE:
+ return (EIO);
+ case DKIOCADDBAD: {
+ /*
+ * This is for ata/ide bad block handling. It is supposed
+ * to cause the driver to re-read the bad block list and
+ * alternate map after it has been updated. Our driver
+ * will refuse to attach to any disk which has a bad blocks
+ * list defined, so there really isn't much to do here.
+ */
+ return (0);
+ }
+ case DKIOCGETDEF: {
+ /*
+ * I can't actually find any code that utilizes this ioctl,
+ * hence we're leaving it explicitly unimplemented.
+ */
+ ASSERT("ioctl cmd unsupported by pv_cmdk: DKIOCGETDEF");
+ return (EIO);
+ }
+ case DIOCTL_RWCMD: {
+ /*
+ * This just seems to just be an alternate interface for
+ * reading and writing the disk. Great, another way to
+ * do the same thing...
+ */
+ return (pv_cmdk_dioctl_rwcmd(dev, arg, flag));
+ }
+ case DKIOCINFO: {
+ dev_info_t *dip = dkp->dk_dip;
+ struct dk_cinfo info;
+
+ /* Pass on the ioctl request, save the response */
+ if ((err = ldi_ioctl(dkp->dk_xdf_lh[part],
+ cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0)
+ return (err);
+
+ /* Update controller info */
+ info.dki_cnum = ddi_get_instance(ddi_get_parent(dip));
+ (void) strlcpy(info.dki_cname,
+ ddi_get_name(ddi_get_parent(dip)), sizeof (info.dki_cname));
+
+ /* Update unit info. */
+ if (info.dki_ctype == DKC_VBD)
+ info.dki_ctype = DKC_DIRECT;
+ info.dki_unit = instance;
+ (void) strlcpy(info.dki_dname,
+ ddi_driver_name(dip), sizeof (info.dki_dname));
+ info.dki_addr = 1;
+
+ if (ddi_copyout(&info, (void *)arg, sizeof (info), flag))
+ return (EFAULT);
+ return (0);
+ }
+ } /* switch (cmd) */
+ /*NOTREACHED*/
+}
+
+/*ARGSUSED*/
+static int
+pv_cmdkopen(dev_t *dev_p, int flag, int otyp, cred_t *credp)
+{
+ ldi_ident_t li;
+ dev_t dev = *dev_p;
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ dev_t xdf_devt = dkp->dk_xdf_dev | part;
+ int err = 0;
+
+ if ((otyp < 0) || (otyp >= OTYPCNT))
+ return (EINVAL);
+
+ /* allocate an ldi handle */
+ VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0);
+
+ mutex_enter(&dkp->dk_mutex);
+
+ /*
+ * We translate all device opens (chr, blk, and lyr) into
+ * block device opens. Why? Because for all the opens that
+ * come through this driver, we only keep around one LDI handle.
+ * So that handle can only be of one open type. The reason
+ * that we choose the block interface for this is that to use
+ * the block interfaces for a device the system needs to allocatex
+ * buf_ts, which are associated with system memory which can act
+ * as a cache for device data. So normally when a block device
+ * is closed the system will ensure that all these pages get
+ * flushed out of memory. But if we were to open the device
+ * as a character device, then when we went to close the underlying
+ * device (even if we had invoked the block interfaces) any data
+ * remaining in memory wouldn't necessairly be flushed out
+ * before the device was closed.
+ */
+ if (dkp->dk_xdf_lh[part] == NULL) {
+ ASSERT(!pv_cmdk_isopen_part(dkp, part));
+
+ err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp,
+ &dkp->dk_xdf_lh[part], li);
+
+ if (err != 0) {
+ mutex_exit(&dkp->dk_mutex);
+ ldi_ident_release(li);
+ return (err);
+ }
+
+ /* Disk devices really shouldn't clone */
+ ASSERT(xdf_devt == (dkp->dk_xdf_dev | part));
+ } else {
+ ldi_handle_t lh_tmp;
+
+ ASSERT(pv_cmdk_isopen_part(dkp, part));
+
+ /* do ldi open/close to get flags and cred check */
+ err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp,
+ &lh_tmp, li);
+ if (err != 0) {
+ mutex_exit(&dkp->dk_mutex);
+ ldi_ident_release(li);
+ return (err);
+ }
+
+ /* Disk devices really shouldn't clone */
+ ASSERT(xdf_devt == (dkp->dk_xdf_dev | part));
+ (void) ldi_close(lh_tmp, flag, credp);
+ }
+ ldi_ident_release(li);
+
+ dkp->dk_xdf_otyp_count[otyp][part]++;
+
+ mutex_exit(&dkp->dk_mutex);
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+pv_cmdkclose(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+ int instance = XDF_DEV2UNIT(dev);
+ int part = XDF_DEV2PART(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ int err = 0;
+
+ ASSERT((otyp >= 0) && otyp < OTYPCNT);
+
+ /*
+ * Sanity check that that the dev_t specified corresponds to this
+ * driver and that the device is actually open. On debug kernels we'll
+ * panic and on non-debug kernels we'll return failure.
+ */
+ ASSERT(getmajor(dev) == pv_cmdk_major);
+ if (getmajor(dev) != pv_cmdk_major)
+ return (ENXIO);
+
+ mutex_enter(&dkp->dk_mutex);
+ ASSERT(pv_cmdk_isopen_part(dkp, part));
+ if (!pv_cmdk_isopen_part(dkp, part)) {
+ mutex_exit(&dkp->dk_mutex);
+ return (ENXIO);
+ }
+
+ ASSERT(dkp->dk_xdf_lh[part] != NULL);
+ ASSERT(dkp->dk_xdf_otyp_count[otyp][part] > 0);
+ if (otyp == OTYP_LYR) {
+ dkp->dk_xdf_otyp_count[otyp][part]--;
+ } else {
+ dkp->dk_xdf_otyp_count[otyp][part] = 0;
+ }
+
+ if (!pv_cmdk_isopen_part(dkp, part)) {
+ err = ldi_close(dkp->dk_xdf_lh[part], flag, credp);
+ dkp->dk_xdf_lh[part] = NULL;
+ }
+
+ mutex_exit(&dkp->dk_mutex);
+
+ return (err);
+}
+
+static int
+pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom)
+{
+ struct scsi_device *scsi_device;
+ struct tgdk_geom tgdk_geom;
+ opaque_t ctlobjp;
+ int err;
+
+ scsi_device = ddi_get_driver_private(dip);
+ ctlobjp = scsi_device->sd_address.a_hba_tran;
+ if ((err = CTL_IOCTL(ctlobjp,
+ DIOCTL_GETPHYGEOM, (uintptr_t)&tgdk_geom, FKIOCTL)) != 0)
+ return (err);
+
+ /* This driver won't work if this isn't true */
+ ASSERT(tgdk_geom.g_secsiz == XB_BSIZE);
+
+ pgeom->g_ncyl = tgdk_geom.g_cyl;
+ pgeom->g_acyl = tgdk_geom.g_acyl;
+ pgeom->g_nhead = tgdk_geom.g_head;
+ pgeom->g_nsect = tgdk_geom.g_sec;
+ pgeom->g_secsize = tgdk_geom.g_secsiz;
+ pgeom->g_capacity = tgdk_geom.g_cap;
+ pgeom->g_intrlv = 1;
+ pgeom->g_rpm = 3600;
+ return (0);
+}
+
+/*
+ * pv_cmdk_bb_check() checks for the existance of bad blocks mappings in
+ * the alternate partition/slice. Returns B_FALSE is there are no bad
+ * block mappins found, and B_TRUE is there are bad block mappins found.
+ */
+static boolean_t
+pv_cmdk_bb_check(struct pv_cmdk *dkp)
+{
+ struct alts_parttbl *ap;
+ diskaddr_t nblocks, blk;
+ uint32_t altused, altbase, altlast;
+ uint16_t vtoctag;
+ int alts;
+
+ /* find slice with V_ALTSCTR tag */
+ for (alts = 0; alts < NDKMAP; alts++) {
+
+ if (cmlb_partinfo(dkp->dk_cmlbhandle, alts,
+ &nblocks, &blk, NULL, &vtoctag, 0) != 0) {
+ /* no partition table exists */
+ return (B_FALSE);
+ }
+
+ if ((vtoctag == V_ALTSCTR) && (nblocks > 1))
+ break;
+ }
+ if (alts >= NDKMAP)
+ return (B_FALSE); /* no V_ALTSCTR slice defined */
+
+ /* read in ALTS label block */
+ ap = (struct alts_parttbl *)kmem_zalloc(NBPSCTR, KM_SLEEP);
+ if (pv_cmdk_lb_rdwr(dkp->dk_dip,
+ TG_READ, ap, blk, NBPSCTR, NULL) != 0)
+ goto err;
+
+ altused = ap->alts_ent_used; /* number of BB entries */
+ altbase = ap->alts_ent_base; /* blk offset from begin slice */
+ altlast = ap->alts_ent_end; /* blk offset to last block */
+
+ if ((altused == 0) || (altbase < 1) ||
+ (altbase > altlast) || (altlast >= nblocks))
+ goto err;
+
+ /* we found bad block mappins */
+ kmem_free(ap, NBPSCTR);
+ return (B_TRUE);
+
+err:
+ kmem_free(ap, NBPSCTR);
+ return (B_FALSE);
+}
+
+/*
+ * Autoconfiguration Routines
+ */
+static int
+pv_cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+ dev_info_t *xdf_dip = NULL;
+ struct pv_cmdk *dkp;
+ cmlb_geom_t pgeom;
+ char *path;
+ int i;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ /*
+ * This cmdk device layers on top of an xdf device. So the first
+ * thing we need to do is determine which xdf device instance this
+ * cmdk instance should be layered on top of.
+ */
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ for (i = 0; pv_cmdk_h2p[i].h2p_hvm_path != NULL; i++) {
+ if (strcmp(pv_cmdk_h2p[i].h2p_hvm_path, path) == 0)
+ break;
+ }
+ kmem_free(path, MAXPATHLEN);
+
+ if (pv_cmdk_h2p[i].h2p_hvm_path == NULL) {
+ /*
+ * UhOh. We don't know what xdf instance this cmdk device
+ * should be mapped to.
+ */
+ return (DDI_FAILURE);
+ }
+
+ /* Check if this device exists */
+ xdf_dip = xdf_hvm_hold(pv_cmdk_h2p[i].h2p_pv_path);
+ if (xdf_dip == NULL)
+ return (DDI_FAILURE);
+
+ /* allocate and initialize our state structure */
+ (void) ddi_soft_state_zalloc(pv_cmdk_state, instance);
+ dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL);
+ dkp->dk_dip = dip;
+ dkp->dk_xdf_dip = xdf_dip;
+ dkp->dk_xdf_dev = makedevice(ddi_driver_major(xdf_dip),
+ XDF_MINOR(ddi_get_instance(xdf_dip), 0));
+
+ ASSERT((dkp->dk_xdf_dev & XDF_PMASK) == 0);
+
+ /*
+ * GROSS HACK ALERT! GROSS HACK ALERT!
+ *
+ * Before we can initialize the cmlb layer, we have to tell the
+ * underlying xdf device what it's physical geometry should be.
+ * See the block comments at the top of this file for more info.
+ */
+ if ((pv_cmdk_getpgeom(dip, &pgeom) != 0) ||
+ (xdf_hvm_setpgeom(dkp->dk_xdf_dip, &pgeom) != 0)) {
+ ddi_release_devi(dkp->dk_xdf_dip);
+ mutex_destroy(&dkp->dk_mutex);
+ ddi_soft_state_free(pv_cmdk_state, instance);
+ return (DDI_FAILURE);
+ }
+
+ /* create kstat for iostat(1M) */
+ if (xdf_kstat_create(dkp->dk_xdf_dip, "cmdk", instance) != 0) {
+ ddi_release_devi(dkp->dk_xdf_dip);
+ mutex_destroy(&dkp->dk_mutex);
+ ddi_soft_state_free(pv_cmdk_state, instance);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Force the xdf front end driver to connect to the backend. From
+ * the solaris device tree perspective, the xdf driver devinfo node
+ * is already in the ATTACHED state. (Otherwise xdf_hvm_hold()
+ * would not have returned a dip.) But this doesn't mean that the
+ * xdf device has actually established a connection to it's back
+ * end driver. For us to be able to access the xdf device it needs
+ * to be connected. There are two ways to force the xdf driver to
+ * connect to the backend device.
+ */
+ if (xdf_hvm_connect(dkp->dk_xdf_dip) != 0) {
+ cmn_err(CE_WARN,
+ "pv driver failed to connect: %s",
+ pv_cmdk_h2p[i].h2p_pv_path);
+ xdf_kstat_delete(dkp->dk_xdf_dip);
+ ddi_release_devi(dkp->dk_xdf_dip);
+ mutex_destroy(&dkp->dk_mutex);
+ ddi_soft_state_free(pv_cmdk_state, instance);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Initalize cmlb. Note that for partition information cmlb
+ * will access the underly xdf disk device directly via
+ * pv_cmdk_lb_rdwr() and pv_cmdk_lb_getinfo(). There are no
+ * layered driver handles associated with this access because
+ * it is a direct disk access that doesn't go through
+ * any of the device nodes exported by the xdf device (since
+ * all exported device nodes only reflect the portion of
+ * the device visible via the partition/slice that the node
+ * is associated with.) So while not observable via the LDI,
+ * this direct disk access is ok since we're actually holding
+ * the target device.
+ */
+ cmlb_alloc_handle((cmlb_handle_t *)&dkp->dk_cmlbhandle);
+ if (cmlb_attach(dkp->dk_dip, &pv_cmdk_lb_ops,
+ DTYPE_DIRECT, /* device_type */
+ 0, /* not removable */
+ 0, /* not hot pluggable */
+ DDI_NT_BLOCK,
+ CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* mimic cmdk */
+ dkp->dk_cmlbhandle, 0) != 0) {
+ cmlb_free_handle(&dkp->dk_cmlbhandle);
+ xdf_kstat_delete(dkp->dk_xdf_dip);
+ ddi_release_devi(dkp->dk_xdf_dip);
+ mutex_destroy(&dkp->dk_mutex);
+ ddi_soft_state_free(pv_cmdk_state, instance);
+ return (DDI_FAILURE);
+ }
+
+ if (pv_cmdk_bb_check(dkp)) {
+ cmn_err(CE_WARN,
+ "pv cmdk disks with bad blocks are unsupported: %s",
+ pv_cmdk_h2p[i].h2p_hvm_path);
+
+ cmlb_detach(dkp->dk_cmlbhandle, 0);
+ cmlb_free_handle(&dkp->dk_cmlbhandle);
+ xdf_kstat_delete(dkp->dk_xdf_dip);
+ ddi_release_devi(dkp->dk_xdf_dip);
+ mutex_destroy(&dkp->dk_mutex);
+ ddi_soft_state_free(pv_cmdk_state, instance);
+ return (DDI_FAILURE);
+ }
+
+ /* setup devid string */
+ pv_cmdk_devid_setup(dkp);
+
+ /* Calling validate will create minor nodes according to disk label */
+ (void) cmlb_validate(dkp->dk_cmlbhandle, 0, 0);
+
+ /*
+ * Add a zero-length attribute to tell the world we support
+ * kernel ioctls (for layered drivers).
+ */
+ (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
+ DDI_KERNEL_IOCTL, NULL, 0);
+
+ /* Have the system report any newly created device nodes */
+ ddi_report_dev(dip);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+pv_cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ASSERT(MUTEX_NOT_HELD(&dkp->dk_mutex));
+
+ ddi_devid_unregister(dip);
+ if (dkp->dk_devid)
+ ddi_devid_free(dkp->dk_devid);
+ cmlb_detach(dkp->dk_cmlbhandle, 0);
+ cmlb_free_handle(&dkp->dk_cmlbhandle);
+ mutex_destroy(&dkp->dk_mutex);
+ xdf_kstat_delete(dkp->dk_xdf_dip);
+ ddi_release_devi(dkp->dk_xdf_dip);
+ ddi_soft_state_free(pv_cmdk_state, instance);
+ ddi_prop_remove_all(dip);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+pv_cmdk_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
+ void **result)
+{
+ dev_t dev = (dev_t)arg;
+ int instance = XDF_DEV2UNIT(dev);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ if (dkp == NULL)
+ return (DDI_FAILURE);
+ *result = (void *)dkp->dk_dip;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)(intptr_t)instance;
+ break;
+ default:
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+}
+
+static int
+pv_cmdk_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
+ int flags, char *name, caddr_t valuep, int *lengthp)
+{
+ int instance = ddi_get_instance(dip);
+ struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
+ dev_info_t *xdf_dip;
+ dev_t xdf_devt;
+ int err;
+
+ /*
+ * Sanity check that if a dev_t or dip were specified that they
+ * correspond to this device driver. On debug kernels we'll
+ * panic and on non-debug kernels we'll return failure.
+ */
+ ASSERT(ddi_driver_major(dip) == pv_cmdk_major);
+ ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == pv_cmdk_major));
+ if ((ddi_driver_major(dip) != pv_cmdk_major) ||
+ ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != pv_cmdk_major)))
+ return (DDI_PROP_NOT_FOUND);
+
+ /*
+ * This property lookup might be associated with a device node
+ * that is not yet attached, if so pass it onto ddi_prop_op().
+ */
+ if (dkp == NULL)
+ return (ddi_prop_op(dev, dip, prop_op, flags,
+ name, valuep, lengthp));
+
+ /*
+ * Make sure we only lookup static properties.
+ *
+ * If there are static properties of the underlying xdf driver
+ * that we want to mirror, then we'll have to explicity look them
+ * up and define them during attach. There are a few reasons
+ * for this. Most importantly, most static properties are typed
+ * and all dynamic properties are untyped, ie, for dynamic
+ * properties the caller must know the type of the property and
+ * how to interpret the value of the property. the prop_op drivedr
+ * entry point is only designed for returning dynamic/untyped
+ * properties, so if we were to attempt to lookup and pass back
+ * static properties of the underlying device here then we would
+ * be losing the type information for those properties. Another
+ * reason we don't want to pass on static property requests is that
+ * static properties are enumerable in the device tree, where as
+ * dynamic ones are not.
+ */
+ flags |= DDI_PROP_DYNAMIC;
+
+ /*
+ * We can't use the ldi here to access the underlying device because
+ * the ldi actually opens the device, and that open might fail if the
+ * device has already been opened with the FEXCL flag. If we used
+ * the ldi here, it would also be possible for some other caller
+ * to try open the device with the FEXCL flag and get a failure
+ * back because we have it open to do a property query.
+ *
+ * Instad we'll grab a hold on the target dip and query the
+ * property directly.
+ */
+ mutex_enter(&dkp->dk_mutex);
+
+ if ((xdf_dip = dkp->dk_xdf_dip) == NULL) {
+ mutex_exit(&dkp->dk_mutex);
+ return (DDI_PROP_NOT_FOUND);
+ }
+ e_ddi_hold_devi(xdf_dip);
+
+ /* figure out the dev_t we're going to pass on down */
+ if (dev == DDI_DEV_T_ANY) {
+ xdf_devt = DDI_DEV_T_ANY;
+ } else {
+ xdf_devt = dkp->dk_xdf_dev | XDF_DEV2PART(dev);
+ }
+
+ mutex_exit(&dkp->dk_mutex);
+
+ /*
+ * Cdev_prop_op() is not a public interface, and normally the caller
+ * is required to make sure that the target driver actually implements
+ * this interface before trying to invoke it. In this case we know
+ * that we're always accessing the xdf driver and it does have this
+ * interface defined, so we can skip the check.
+ */
+ err = cdev_prop_op(xdf_devt, xdf_dip,
+ prop_op, flags, name, valuep, lengthp);
+ ddi_release_devi(xdf_dip);
+ return (err);
+}
+
+/*
+ * Device driver ops vector
+ */
+static struct cb_ops pv_cmdk_cb_ops = {
+ pv_cmdkopen, /* open */
+ pv_cmdkclose, /* close */
+ pv_cmdkstrategy, /* strategy */
+ nodev, /* print */
+ pv_cmdkdump, /* dump */
+ pv_cmdkread, /* read */
+ pv_cmdkwrite, /* write */
+ pv_cmdkioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ pv_cmdk_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */
+ CB_REV, /* cb_rev */
+ pv_cmdkaread, /* async read */
+ pv_cmdkawrite /* async write */
+};
+
+struct dev_ops pv_cmdk_ops = {
+ DEVO_REV, /* devo_rev, */
+ 0, /* refcnt */
+ pv_cmdk_getinfo, /* info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ pv_cmdkattach, /* attach */
+ pv_cmdkdetach, /* detach */
+ nodev, /* reset */
+ &pv_cmdk_cb_ops, /* driver operations */
+ (struct bus_ops *)0 /* bus operations */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ "PV Common Direct Access Disk",
+ &pv_cmdk_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modldrv, NULL
+};
+
+int
+_init(void)
+{
+ int rval;
+
+ if ((pv_cmdk_major = ddi_name_to_major("cmdk")) == (major_t)-1)
+ return (EINVAL);
+
+ /*
+ * In general ide usually supports 4 disk devices, this same
+ * limitation also applies to software emulating ide devices.
+ * so by default we pre-allocate 4 cmdk soft state structures.
+ */
+ if ((rval = ddi_soft_state_init(&pv_cmdk_state,
+ sizeof (struct pv_cmdk), PV_CMDK_NODES)) != 0)
+ return (rval);
+
+ /*
+ * Currently we only support qemu as the backing hardware emulator
+ * for cmdk devices.
+ */
+ pv_cmdk_h2p = pv_cmdk_h2p_xen_qemu;
+
+ /* Install our module */
+ if ((rval = mod_install(&modlinkage)) != 0) {
+ ddi_soft_state_fini(&pv_cmdk_state);
+ return (rval);
+ }
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int rval;
+ if ((rval = mod_remove(&modlinkage)) != 0)
+ return (rval);
+ ddi_soft_state_fini(&pv_cmdk_state);
+ return (0);
+}
diff --git a/usr/src/uts/i86pc/i86hvm/io/pv_rtls.c b/usr/src/uts/i86pc/i86hvm/io/pv_rtls.c
new file mode 100644
index 0000000000..40303870b4
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/pv_rtls.c
@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Fake rtls module. Prevents the real rtls driver from loading in
+ * a xen HVM domain so that xnf may operate instead.
+ */
+
+#include <sys/sunddi.h>
+#include <sys/errno.h>
+#include <sys/modctl.h>
+
+struct dev_ops pv_rtls_ops = {
+ DEVO_REV,
+ 0,
+ NULL,
+ nulldev,
+ nulldev,
+ NULL,
+ NULL,
+ nodev,
+ NULL,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "xVM rtls stub %I%",
+ &pv_rtls_ops
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modldrv, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (EBUSY);
+}
diff --git a/usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c b/usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c
new file mode 100644
index 0000000000..14d5bcc4b9
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c
@@ -0,0 +1,389 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/xpv_support.h>
+#include <sys/hypervisor.h>
+#include <sys/machsystm.h>
+#include <sys/mutex.h>
+#include <sys/cmn_err.h>
+#include <sys/dditypes.h>
+#include <sys/atomic.h>
+#include <sys/sysmacros.h>
+#include <sys/cpu.h>
+#include <sys/psw.h>
+#include <sys/psm.h>
+#include <sys/sdt.h>
+
+extern dev_info_t *xpv_dip;
+static ddi_intr_handle_t *evtchn_ihp = NULL;
+static ddi_softint_handle_t evtchn_to_handle[NR_EVENT_CHANNELS];
+kmutex_t ec_lock;
+
+static int evtchn_callback_irq = -1;
+
+static volatile ulong_t *pending_events;
+static volatile ulong_t *masked_events;
+
+/* log2(NBBY * sizeof (ulong)) */
+#ifdef __amd64
+#define EVTCHN_SHIFT 6
+#else /* __i386 */
+#define EVTCHN_SHIFT 5
+#endif
+
+/* Atomically get and clear a ulong from memory. */
+#define GET_AND_CLEAR(src, targ) { \
+ membar_enter(); \
+ do { \
+ targ = *src; \
+ } while (atomic_cas_ulong(src, targ, 0) != targ); \
+}
+
+/* Get the first and last bits set in a bitmap */
+#define GET_BOUNDS(bitmap, low, high) { \
+ int _i; \
+ low = high = -1; \
+ for (_i = 0; _i <= sizeof (ulong_t); _i++) \
+ if (bitmap & (1UL << _i)) { \
+ if (low == -1) \
+ low = _i; \
+ high = _i; \
+ } \
+}
+
+void
+ec_bind_evtchn_to_handler(int evtchn, pri_t pri, ec_handler_fcn_t handler,
+ void *arg1)
+{
+ ddi_softint_handle_t hdl;
+
+ if (evtchn < 0 || evtchn > NR_EVENT_CHANNELS) {
+ cmn_err(CE_WARN, "Binding invalid event channel: %d", evtchn);
+ return;
+ }
+
+ (void) ddi_intr_add_softint(xpv_dip, &hdl, pri, handler, (caddr_t)arg1);
+ mutex_enter(&ec_lock);
+ ASSERT(evtchn_to_handle[evtchn] == NULL);
+ evtchn_to_handle[evtchn] = hdl;
+ mutex_exit(&ec_lock);
+
+ /* Let the hypervisor know we're prepared to handle this event */
+ hypervisor_unmask_event(evtchn);
+}
+
+void
+ec_unbind_evtchn(int evtchn)
+{
+ evtchn_close_t close;
+ ddi_softint_handle_t hdl;
+
+ if (evtchn < 0 || evtchn > NR_EVENT_CHANNELS) {
+ cmn_err(CE_WARN, "Unbinding invalid event channel: %d", evtchn);
+ return;
+ }
+
+ /*
+ * Let the hypervisor know we're no longer prepared to handle this
+ * event
+ */
+ hypervisor_mask_event(evtchn);
+
+ /* Cleanup the event handler metadata */
+ mutex_enter(&ec_lock);
+ hdl = evtchn_to_handle[evtchn];
+ evtchn_to_handle[evtchn] = NULL;
+ mutex_exit(&ec_lock);
+
+ close.port = evtchn;
+ (void) HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ (void) ddi_intr_remove_softint(hdl);
+}
+
+void
+ec_notify_via_evtchn(unsigned int port)
+{
+ evtchn_send_t send;
+
+ if ((int)port == -1)
+ return;
+ send.port = port;
+ (void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
+}
+
+void
+hypervisor_unmask_event(unsigned int ev)
+{
+ int index = ev >> EVTCHN_SHIFT;
+ ulong_t bit = 1UL << (ev & ((1UL << EVTCHN_SHIFT) - 1));
+ volatile ulong_t *maskp;
+ evtchn_unmask_t unmask;
+
+ /*
+ * index,bit contain the event number as an index into the
+ * masked-events bitmask. Set it to 0.
+ */
+ maskp = &masked_events[index];
+ atomic_and_ulong(maskp, ~bit);
+
+ /* Let the hypervisor know the event has been unmasked */
+ unmask.port = ev;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0)
+ panic("xen_evtchn_unmask() failed");
+}
+
+/* Set a bit in an evtchan mask word */
+void
+hypervisor_mask_event(uint_t ev)
+{
+ int index = ev >> EVTCHN_SHIFT;
+ ulong_t bit = 1UL << (ev & ((1UL << EVTCHN_SHIFT) - 1));
+ volatile ulong_t *maskp;
+
+ maskp = &masked_events[index];
+ atomic_or_ulong(maskp, bit);
+}
+
+void
+hypervisor_clear_event(uint_t ev)
+{
+ int index = ev >> EVTCHN_SHIFT;
+ ulong_t bit = 1UL << (ev & ((1UL << EVTCHN_SHIFT) - 1));
+ volatile ulong_t *maskp;
+
+ maskp = &pending_events[index];
+ atomic_and_ulong(maskp, ~bit);
+}
+
+int
+xen_alloc_unbound_evtchn(int domid, int *evtchnp)
+{
+ evtchn_alloc_unbound_t alloc;
+ int err;
+
+ alloc.dom = DOMID_SELF;
+ alloc.remote_dom = (domid_t)domid;
+
+ if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+ &alloc)) == 0) {
+ *evtchnp = alloc.port;
+ /* ensure evtchn is masked till we're ready to use it */
+ (void) hypervisor_mask_event(*evtchnp);
+ } else {
+ err = xen_xlate_errcode(err);
+ }
+
+ return (err);
+}
+
+int
+xen_bind_interdomain(int domid, int remote_port, int *port)
+{
+ evtchn_bind_interdomain_t bind;
+ int err;
+
+ bind.remote_dom = (domid_t)domid;
+ bind.remote_port = remote_port;
+ if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+ &bind)) == 0)
+ *port = bind.local_port;
+ else
+ err = xen_xlate_errcode(err);
+ return (err);
+}
+
+/*ARGSUSED*/
+uint_t
+evtchn_callback_fcn(caddr_t arg0, caddr_t arg1)
+{
+ ulong_t pending_word;
+ int i, j, port;
+ volatile struct vcpu_info *vci;
+ uint_t rv = DDI_INTR_UNCLAIMED;
+ ddi_softint_handle_t hdl;
+ int low, high;
+ ulong_t sels;
+
+ vci = &HYPERVISOR_shared_info->vcpu_info[CPU->cpu_id];
+
+again:
+ DTRACE_PROBE2(evtchn__scan__start, int, vci->evtchn_upcall_pending,
+ ulong_t, vci->evtchn_pending_sel);
+
+ atomic_and_8(&vci->evtchn_upcall_pending, 0);
+
+ /*
+ * Find the upper and lower bounds in which we need to search for
+ * pending events.
+ */
+ GET_AND_CLEAR(&vci->evtchn_pending_sel, sels);
+
+ /* sels == 1 is by far the most common case. Make it fast */
+ if (sels == 1)
+ low = high = 0;
+ else if (sels == 0)
+ return (rv);
+ else
+ GET_BOUNDS(sels, low, high);
+
+ /* Scan the port list, looking for words with bits set */
+ for (i = low; i <= high; i++) {
+ ulong_t tmp;
+
+ GET_AND_CLEAR(&pending_events[i], tmp);
+ pending_word = tmp & ~(masked_events[i]);
+
+ /* Scan the bits in the word, looking for pending events */
+ while (pending_word != 0) {
+ j = lowbit(pending_word) - 1;
+ port = (i << EVTCHN_SHIFT) + j;
+ pending_word = pending_word & ~(1 << j);
+
+ /*
+ * If there is a handler registered for this event,
+ * schedule a softint of the appropriate priority
+ * to execute it.
+ */
+ if ((hdl = evtchn_to_handle[port]) != NULL) {
+ (void) ddi_intr_trigger_softint(hdl, NULL);
+ rv = DDI_INTR_CLAIMED;
+ }
+ }
+ }
+ DTRACE_PROBE2(evtchn__scan__end, int, vci->evtchn_upcall_pending,
+ ulong_t, vci->evtchn_pending_sel);
+
+ if ((volatile uint8_t)vci->evtchn_upcall_pending ||
+ ((volatile ulong_t)vci->evtchn_pending_sel))
+ goto again;
+
+ return (rv);
+}
+
+static int
+set_hvm_callback(int irq)
+{
+ struct xen_hvm_param xhp;
+
+ xhp.domid = DOMID_SELF;
+ xhp.index = HVM_PARAM_CALLBACK_IRQ;
+ xhp.value = irq;
+ return (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp));
+}
+
+void
+ec_fini()
+{
+ int i;
+
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ ec_unbind_evtchn(i);
+
+ evtchn_callback_irq = -1;
+ if (evtchn_ihp != NULL) {
+ (void) ddi_intr_disable(*evtchn_ihp);
+ (void) ddi_intr_remove_handler(*evtchn_ihp);
+ (void) ddi_intr_free(*evtchn_ihp);
+ kmem_free(evtchn_ihp, sizeof (ddi_intr_handle_t));
+ evtchn_ihp = NULL;
+ }
+}
+
+int
+ec_init(dev_info_t *dip)
+{
+ int i;
+ int rv, actual;
+ ddi_intr_handle_t *ihp;
+
+ /*
+ * Translate the variable-sized pending and masked event bitmasks
+ * into constant-sized arrays of uint32_t's.
+ */
+ pending_events = &HYPERVISOR_shared_info->evtchn_pending[0];
+ masked_events = &HYPERVISOR_shared_info->evtchn_mask[0];
+
+ /*
+ * Clear our event handler structures and prevent the hypervisor
+ * from triggering any events.
+ */
+ mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7));
+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+ evtchn_to_handle[i] = NULL;
+ (void) hypervisor_mask_event(i);
+ }
+
+ /*
+ * Allocate and initialize an interrupt handler to process the
+ * hypervisor's "hey you have events pending!" interrupt.
+ */
+ ihp = kmem_zalloc(sizeof (ddi_intr_handle_t), KM_SLEEP);
+ rv = ddi_intr_alloc(dip, ihp, DDI_INTR_TYPE_FIXED, 0, 1, &actual,
+ DDI_INTR_ALLOC_NORMAL);
+ if (rv < 0 || actual != 1) {
+ cmn_err(CE_WARN, "Could not allocate evtchn interrupt: %d",
+ rv);
+ return (-1);
+ }
+
+ rv = ddi_intr_add_handler(*ihp, evtchn_callback_fcn, NULL, NULL);
+ if (rv < 0) {
+ (void) ddi_intr_free(*ihp);
+ cmn_err(CE_WARN, "Could not attach evtchn handler");
+ return (-1);
+ }
+ evtchn_ihp = ihp;
+
+ if (ddi_intr_enable(*ihp) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "Could not enable evtchn interrupts\n");
+ return (-1);
+ }
+
+ /* Tell the hypervisor which interrupt we're waiting on. */
+ evtchn_callback_irq = ((ddi_intr_handle_impl_t *)*ihp)->ih_vector;
+
+ if (set_hvm_callback(evtchn_callback_irq) != 0) {
+ cmn_err(CE_WARN, "Couldn't register evtchn callback");
+ return (-1);
+ }
+ return (0);
+}
+
+void
+ec_resume(void)
+{
+ int i;
+
+ /* New event-channel space is not 'live' yet. */
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ (void) hypervisor_mask_event(i);
+ if (set_hvm_callback(evtchn_callback_irq) != 0)
+ cmn_err(CE_WARN, "Couldn't register evtchn callback");
+
+}
diff --git a/usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf b/usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf
new file mode 100644
index 0000000000..d599f6f3ff
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+# ident "%Z%%M% %I% %E% SMI"
+
+interrupt-priorities=9;
diff --git a/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c b/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c
new file mode 100644
index 0000000000..f5de99a175
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c
@@ -0,0 +1,956 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/modctl.h>
+#include <sys/types.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/sunndi.h>
+#include <sys/sunddi.h>
+#include <sys/ddi_subrdefs.h>
+#include <sys/xpv_support.h>
+#include <sys/xen_errno.h>
+#include <sys/hypervisor.h>
+#include <sys/gnttab.h>
+#include <sys/xenbus_comms.h>
+#include <sys/xenbus_impl.h>
+#include <xen/sys/xendev.h>
+#include <sys/sysmacros.h>
+#include <sys/x86_archext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/pc_mmu.h>
+#include <sys/cmn_err.h>
+#include <sys/cpr.h>
+#include <sys/ddi.h>
+#include <vm/seg_kmem.h>
+#include <vm/as.h>
+#include <vm/hat_pte.h>
+#include <vm/hat_i86.h>
+
+#define XPV_MINOR 0
+#define XPV_BUFSIZE 128
+
+/*
+ * This structure is ordinarily constructed by Xen. In the HVM world, we
+ * manually fill in the few fields the PV drivers need.
+ */
+start_info_t *xen_info = NULL;
+
+/* Xen version number. */
+int xen_major, xen_minor;
+
+/* Metadata page shared between domain and Xen */
+shared_info_t *HYPERVISOR_shared_info = NULL;
+
+/* Page containing code to issue hypercalls. */
+extern caddr_t hypercall_page;
+
+/* Is the hypervisor 64-bit? */
+int xen_is_64bit = -1;
+
+/* virtual addr for the store_mfn page */
+caddr_t xb_addr;
+
+dev_info_t *xpv_dip;
+static dev_info_t *xpvd_dip;
+
+/* saved pfn of the shared info page */
+static pfn_t shared_info_frame;
+
+#ifdef DEBUG
+int xen_suspend_debug;
+
+#define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
+#else
+#define SUSPEND_DEBUG(...)
+#endif
+
+/*
+ * Forward declarations
+ */
+static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int xpv_attach(dev_info_t *, ddi_attach_cmd_t);
+static int xpv_detach(dev_info_t *, ddi_detach_cmd_t);
+static int xpv_open(dev_t *, int, int, cred_t *);
+static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+
+static struct cb_ops xpv_cb_ops = {
+ xpv_open,
+ nulldev, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ xpv_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op,
+ NULL,
+ D_MP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops xpv_dv_ops = {
+ DEVO_REV,
+ 0,
+ xpv_getinfo,
+ nulldev, /* identify */
+ nulldev, /* probe */
+ xpv_attach,
+ xpv_detach,
+ nodev, /* reset */
+ &xpv_cb_ops,
+ NULL, /* struct bus_ops */
+ NULL /* power */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "xpv driver %I%",
+ &xpv_dv_ops
+};
+
+static struct modlinkage modl = {
+ MODREV_1,
+ {
+ (void *)&modldrv,
+ NULL /* null termination */
+ }
+};
+
+static ddi_dma_attr_t xpv_dma_attr = {
+ DMA_ATTR_V0, /* version of this structure */
+ 0, /* lowest usable address */
+ 0xffffffffffffffffULL, /* highest usable address */
+ 0x7fffffff, /* maximum DMAable byte count */
+ MMU_PAGESIZE, /* alignment in bytes */
+ 0x7ff, /* bitmap of burst sizes */
+ 1, /* minimum transfer */
+ 0xffffffffU, /* maximum transfer */
+ 0x7fffffffULL, /* maximum segment length */
+ 1, /* maximum number of segments */
+ 1, /* granularity */
+ 0, /* flags (reserved) */
+};
+
+static ddi_device_acc_attr_t xpv_accattr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_NEVERSWAP_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+#define MAX_ALLOCATIONS 10
+static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS];
+static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS];
+static int xen_alloc_cnt = 0;
+
+void *
+xen_alloc_pages(pgcnt_t cnt)
+{
+ size_t len;
+ int a = xen_alloc_cnt++;
+ caddr_t addr;
+
+ ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS);
+ if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0,
+ &xpv_dma_handle[a]) != DDI_SUCCESS)
+ return (NULL);
+
+ if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt,
+ &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0,
+ &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) {
+ ddi_dma_free_handle(&xpv_dma_handle[a]);
+ cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices");
+ return (NULL);
+ }
+ return (addr);
+}
+
+/*
+ * This function is invoked twice, first time with reprogram=0 to set up
+ * the xpvd portion of the device tree. The second time it is ignored.
+ */
+static void
+xpv_enumerate(int reprogram)
+{
+ dev_info_t *dip;
+
+ if (reprogram != 0)
+ return;
+
+ ndi_devi_alloc_sleep(ddi_root_node(), "xpvd",
+ (pnode_t)DEVI_SID_NODEID, &dip);
+
+ (void) ndi_devi_bind_driver(dip, 0);
+
+ /*
+ * Too early to enumerate split device drivers in domU
+ * since we need to create taskq thread during enumeration.
+ * So, we only enumerate softdevs and console here.
+ */
+ xendev_enum_all(dip, B_TRUE);
+}
+
+/*
+ * Translate a hypervisor errcode to a Solaris error code.
+ */
+int
+xen_xlate_errcode(int error)
+{
+#define CASE(num) case X_##num: error = num; break
+
+ switch (-error) {
+ CASE(EPERM); CASE(ENOENT); CASE(ESRCH);
+ CASE(EINTR); CASE(EIO); CASE(ENXIO);
+ CASE(E2BIG); CASE(ENOMEM); CASE(EACCES);
+ CASE(EFAULT); CASE(EBUSY); CASE(EEXIST);
+ CASE(ENODEV); CASE(EISDIR); CASE(EINVAL);
+ CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS);
+ CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN);
+ CASE(ENODATA);
+ default:
+ panic("xen_xlate_errcode: unknown error %d", error);
+ }
+ return (error);
+#undef CASE
+}
+
+/*PRINTFLIKE1*/
+void
+xen_printf(const char *fmt, ...)
+{
+ va_list adx;
+
+ va_start(adx, fmt);
+ printf(fmt, adx);
+ va_end(adx);
+}
+
+/*
+ * Stub functions to get the FE drivers to build, and to catch drivers that
+ * misbehave in HVM domains.
+ */
+/*ARGSUSED*/
+void
+xen_release_pfn(pfn_t pfn, caddr_t va)
+{
+ panic("xen_release_pfn() is not supported in HVM domains");
+}
+
+/*ARGSUSED*/
+void
+reassign_pfn(pfn_t pfn, mfn_t mfn)
+{
+ panic("reassign_pfn() is not supported in HVM domains");
+}
+
+/*ARGSUSED*/
+long
+balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
+{
+ panic("balloon_free_pages() is not supported in HVM domains");
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+balloon_drv_added(int64_t delta)
+{
+ panic("balloon_drv_added() is not supported in HVM domains");
+}
+
+/*
+ * Add a mapping for the machine page at the given virtual address.
+ */
+void
+kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
+{
+ ASSERT(level == 0);
+
+ hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE,
+ mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD);
+}
+
+static uint64_t
+hvm_get_param(int param_id)
+{
+ struct xen_hvm_param xhp;
+
+ xhp.domid = DOMID_SELF;
+ xhp.index = param_id;
+ if ((HYPERVISOR_hvm_op(HVMOP_get_param, &xhp) < 0))
+ return (-1);
+ return (xhp.value);
+}
+
+static struct xenbus_watch shutdown_watch;
+taskq_t *xen_shutdown_tq;
+
+#define SHUTDOWN_INVALID -1
+#define SHUTDOWN_POWEROFF 0
+#define SHUTDOWN_REBOOT 1
+#define SHUTDOWN_SUSPEND 2
+#define SHUTDOWN_HALT 3
+#define SHUTDOWN_MAX 4
+
+#define SHUTDOWN_TIMEOUT_SECS (60 * 5)
+
+static const char *cmd_strings[SHUTDOWN_MAX] = {
+ "poweroff",
+ "reboot",
+ "suspend",
+ "halt"
+};
+
+int
+xen_suspend_devices(dev_info_t *dip)
+{
+ int error;
+ char buf[XPV_BUFSIZE];
+
+ SUSPEND_DEBUG("xen_suspend_devices\n");
+
+ for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
+ if (xen_suspend_devices(ddi_get_child(dip)))
+ return (ENXIO);
+ if (ddi_get_driver(dip) == NULL)
+ continue;
+ SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf));
+ ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0);
+
+
+ if (!i_ddi_devi_attached(dip)) {
+ error = DDI_FAILURE;
+ } else {
+ error = devi_detach(dip, DDI_SUSPEND);
+ }
+
+ if (error == DDI_SUCCESS) {
+ DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED;
+ } else {
+ SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n",
+ ddi_deviname(dip, buf));
+ cmn_err(CE_WARN, "Unable to suspend device %s.",
+ ddi_deviname(dip, buf));
+ cmn_err(CE_WARN, "Device is busy or does not "
+ "support suspend/resume.");
+ return (ENXIO);
+ }
+ }
+ return (0);
+}
+
+int
+xen_resume_devices(dev_info_t *start, int resume_failed)
+{
+ dev_info_t *dip, *next, *last = NULL;
+ int did_suspend;
+ int error = resume_failed;
+ char buf[XPV_BUFSIZE];
+
+ SUSPEND_DEBUG("xen_resume_devices\n");
+
+ while (last != start) {
+ dip = start;
+ next = ddi_get_next_sibling(dip);
+ while (next != last) {
+ dip = next;
+ next = ddi_get_next_sibling(dip);
+ }
+
+ /*
+ * cpr is the only one that uses this field and the device
+ * itself hasn't resumed yet, there is no need to use a
+ * lock, even though kernel threads are active by now.
+ */
+ did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED;
+ if (did_suspend)
+ DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED;
+
+ /*
+ * There may be background attaches happening on devices
+ * that were not originally suspended by cpr, so resume
+ * only devices that were suspended by cpr. Also, stop
+ * resuming after the first resume failure, but traverse
+ * the entire tree to clear the suspend flag.
+ */
+ if (did_suspend && !error) {
+ SUSPEND_DEBUG("Resuming device %s\n",
+ ddi_deviname(dip, buf));
+ /*
+ * If a device suspended by cpr gets detached during
+ * the resume process (for example, due to hotplugging)
+ * before cpr gets around to issuing it a DDI_RESUME,
+ * we'll have problems.
+ */
+ if (!i_ddi_devi_attached(dip)) {
+ cmn_err(CE_WARN, "Skipping %s, device "
+ "not ready for resume",
+ ddi_deviname(dip, buf));
+ } else {
+ if (devi_attach(dip, DDI_RESUME) !=
+ DDI_SUCCESS) {
+ error = ENXIO;
+ }
+ }
+ }
+
+ if (error == ENXIO) {
+ cmn_err(CE_WARN, "Unable to resume device %s",
+ ddi_deviname(dip, buf));
+ }
+
+ error = xen_resume_devices(ddi_get_child(dip), error);
+ last = dip;
+ }
+
+ return (error);
+}
+
+/*ARGSUSED*/
+static int
+check_xpvd(dev_info_t *dip, void *arg)
+{
+ char *name;
+
+ name = ddi_node_name(dip);
+ if (name == NULL || strcmp(name, "xpvd")) {
+ return (DDI_WALK_CONTINUE);
+ } else {
+ xpvd_dip = dip;
+ return (DDI_WALK_TERMINATE);
+ }
+}
+
+/*
+ * Top level routine to direct suspend/resume of a domain.
+ */
+void
+xen_suspend_domain(void)
+{
+ extern void rtcsync(void);
+ extern void ec_resume(void);
+ extern kmutex_t ec_lock;
+ struct xen_add_to_physmap xatp;
+ ulong_t flags;
+ int err;
+
+ cmn_err(CE_NOTE, "Domain suspending for save/migrate");
+
+ SUSPEND_DEBUG("xen_suspend_domain\n");
+
+ /*
+ * We only want to suspend the PV devices, since the emulated devices
+ * are suspended by saving the emulated device state. The PV devices
+ * are all children of the xpvd nexus device. So we search the
+ * device tree for the xpvd node to use as the root of the tree to
+ * be suspended.
+ */
+ if (xpvd_dip == NULL)
+ ddi_walk_devs(ddi_root_node(), check_xpvd, NULL);
+
+ /*
+ * suspend interrupts and devices
+ */
+ if (xpvd_dip != NULL)
+ (void) xen_suspend_devices(ddi_get_child(xpvd_dip));
+ else
+ cmn_err(CE_WARN, "No PV devices found to suspend");
+ SUSPEND_DEBUG("xenbus_suspend\n");
+ xenbus_suspend();
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Suspend on vcpu 0
+ */
+ thread_affinity_set(curthread, 0);
+ kpreempt_disable();
+
+ if (ncpus > 1)
+ pause_cpus(NULL);
+ /*
+ * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
+ * any holder would have dropped it to get through pause_cpus().
+ */
+ mutex_enter(&ec_lock);
+
+ /*
+ * From here on in, we can't take locks.
+ */
+
+ flags = intr_clear();
+
+ SUSPEND_DEBUG("HYPERVISOR_suspend\n");
+ /*
+ * At this point we suspend and sometime later resume.
+ * Note that this call may return with an indication of a cancelled
+ * for now no matter ehat the return we do a full resume of all
+ * suspended drivers, etc.
+ */
+ (void) HYPERVISOR_shutdown(SHUTDOWN_suspend);
+
+ /*
+ * Point HYPERVISOR_shared_info to the proper place.
+ */
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = shared_info_frame;
+ if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0)
+ panic("Could not set shared_info page. error: %d", err);
+
+ SUSPEND_DEBUG("gnttab_resume\n");
+ gnttab_resume();
+
+ SUSPEND_DEBUG("ec_resume\n");
+ ec_resume();
+
+ intr_restore(flags);
+
+ if (ncpus > 1)
+ start_cpus();
+
+ mutex_exit(&ec_lock);
+ mutex_exit(&cpu_lock);
+
+ /*
+ * Now we can take locks again.
+ */
+
+ rtcsync();
+
+ SUSPEND_DEBUG("xenbus_resume\n");
+ xenbus_resume();
+ SUSPEND_DEBUG("xen_resume_devices\n");
+ if (xpvd_dip != NULL)
+ (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0);
+
+ thread_affinity_clear(curthread);
+ kpreempt_enable();
+
+ SUSPEND_DEBUG("finished xen_suspend_domain\n");
+
+ cmn_err(CE_NOTE, "domain restore/migrate completed");
+}
+
+static void
+xen_dirty_shutdown(void *arg)
+{
+ int cmd = (uintptr_t)arg;
+
+ cmn_err(CE_WARN, "Externally requested shutdown failed or "
+ "timed out.\nShutting down.\n");
+
+ switch (cmd) {
+ case SHUTDOWN_HALT:
+ case SHUTDOWN_POWEROFF:
+ (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
+ break;
+ case SHUTDOWN_REBOOT:
+ (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
+ break;
+ }
+}
+
+static void
+xen_shutdown(void *arg)
+{
+ nvlist_t *attr_list = NULL;
+ sysevent_t *event = NULL;
+ sysevent_id_t eid;
+ int cmd = (uintptr_t)arg;
+ int err;
+
+ ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
+
+ if (cmd == SHUTDOWN_SUSPEND) {
+ xen_suspend_domain();
+ return;
+ }
+
+ err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME, KM_SLEEP);
+ if (err != DDI_SUCCESS)
+ goto failure;
+
+ err = nvlist_add_string(attr_list, "shutdown", cmd_strings[cmd]);
+ if (err != DDI_SUCCESS)
+ goto failure;
+
+ if ((event = sysevent_alloc("EC_xpvsys", "control", "SUNW:kern:xpv",
+ SE_SLEEP)) == NULL)
+ goto failure;
+ (void) sysevent_attach_attributes(event,
+ (sysevent_attr_list_t *)attr_list);
+
+ err = log_sysevent(event, SE_SLEEP, &eid);
+
+ sysevent_detach_attributes(event);
+ sysevent_free(event);
+
+ if (err != 0)
+ goto failure;
+
+ (void) timeout(xen_dirty_shutdown, arg,
+ SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
+
+ nvlist_free(attr_list);
+ return;
+
+failure:
+ if (attr_list != NULL)
+ nvlist_free(attr_list);
+ xen_dirty_shutdown(arg);
+}
+
+/*ARGSUSED*/
+static void
+xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
+ unsigned int len)
+{
+ char *str;
+ xenbus_transaction_t xbt;
+ int err, shutdown_code = SHUTDOWN_INVALID;
+ unsigned int slen;
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err)
+ return;
+ if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
+ (void) xenbus_transaction_end(xbt, 1);
+ return;
+ }
+
+ SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
+
+ /*
+ * If this is a watch fired from our write below, check out early to
+ * avoid an infinite loop.
+ */
+ if (strcmp(str, "") == 0) {
+ (void) xenbus_transaction_end(xbt, 0);
+ kmem_free(str, slen);
+ return;
+ } else if (strcmp(str, "poweroff") == 0) {
+ shutdown_code = SHUTDOWN_POWEROFF;
+ } else if (strcmp(str, "reboot") == 0) {
+ shutdown_code = SHUTDOWN_REBOOT;
+ } else if (strcmp(str, "suspend") == 0) {
+ shutdown_code = SHUTDOWN_SUSPEND;
+ } else if (strcmp(str, "halt") == 0) {
+ shutdown_code = SHUTDOWN_HALT;
+ } else {
+ printf("Ignoring shutdown request: %s\n", str);
+ }
+
+ (void) xenbus_write(xbt, "control", "shutdown", "");
+ err = xenbus_transaction_end(xbt, 0);
+ if (err == EAGAIN) {
+ SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
+ kmem_free(str, slen);
+ goto again;
+ }
+
+ kmem_free(str, slen);
+ if (shutdown_code != SHUTDOWN_INVALID) {
+ (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
+ (void *)(intptr_t)shutdown_code, 0);
+ }
+}
+
+static int
+xen_pv_init(dev_info_t *xpv_dip)
+{
+ struct cpuid_regs cp;
+ uint32_t xen_signature[4];
+ char *xen_str;
+ struct xen_add_to_physmap xatp;
+ xen_capabilities_info_t caps;
+ pfn_t pfn;
+ uint64_t msrval;
+ int err;
+
+ /*
+ * Xen's pseudo-cpuid function 0x40000000 returns a string
+ * representing the Xen signature in %ebx, %ecx, and %edx.
+ * %eax contains the maximum supported cpuid function.
+ */
+ cp.cp_eax = 0x40000000;
+ (void) __cpuid_insn(&cp);
+ xen_signature[0] = cp.cp_ebx;
+ xen_signature[1] = cp.cp_ecx;
+ xen_signature[2] = cp.cp_edx;
+ xen_signature[3] = 0;
+ xen_str = (char *)xen_signature;
+ if (strcmp("XenVMMXenVMM", xen_str) != 0 ||
+ cp.cp_eax < 0x40000002) {
+ cmn_err(CE_WARN,
+ "Attempting to load Xen drivers on non-Xen system");
+ return (-1);
+ }
+
+ /*
+ * cpuid function 0x40000001 returns the Xen version in %eax. The
+ * top 16 bits are the major version, the bottom 16 are the minor
+ * version.
+ */
+ cp.cp_eax = 0x40000001;
+ (void) __cpuid_insn(&cp);
+ xen_major = cp.cp_eax >> 16;
+ xen_minor = cp.cp_eax & 0xffff;
+
+ /*
+ * The xpv driver is incompatible with xen versions older than 3.1. This
+ * is due to the changes in the vcpu_info and shared_info structs used
+ * to communicate with the hypervisor (the event channels in particular)
+ * that were introduced with 3.1.
+ */
+ if (xen_major < 3 || (xen_major == 3 && xen_minor < 1)) {
+ cmn_err(CE_WARN, "Xen version %d.%d is not supported",
+ xen_major, xen_minor);
+ return (-1);
+ }
+
+ /*
+ * cpuid function 0x40000002 returns information about the
+ * hypercall page. %eax nominally contains the number of pages
+ * with hypercall code, but according to the Xen guys, "I'll
+ * guarantee that remains one forever more, so you can just
+ * allocate a single page and get quite upset if you ever see CPUID
+ * return more than one page." %ebx contains an MSR we use to ask
+ * Xen to remap each page at a specific pfn.
+ */
+ cp.cp_eax = 0x40000002;
+ (void) __cpuid_insn(&cp);
+
+ /*
+ * Let Xen know where we want the hypercall page mapped. We
+ * already have a page allocated in the .text section to simplify
+ * the wrapper code.
+ */
+ pfn = hat_getpfnum(kas.a_hat, (caddr_t)&hypercall_page);
+ msrval = mmu_ptob(pfn);
+ wrmsr(cp.cp_ebx, msrval);
+
+ /* Fill in the xen_info data */
+ xen_info = kmem_zalloc(sizeof (start_info_t), KM_SLEEP);
+ (void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor);
+ xen_info->store_mfn = (mfn_t)hvm_get_param(HVM_PARAM_STORE_PFN);
+ xen_info->store_evtchn = (int)hvm_get_param(HVM_PARAM_STORE_EVTCHN);
+
+ /* Figure out whether the hypervisor is 32-bit or 64-bit. */
+ if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) {
+ ((char *)(caps))[sizeof (caps) - 1] = '\0';
+ if (strstr(caps, "x86_64") != NULL)
+ xen_is_64bit = 1;
+ else if (strstr(caps, "x86_32") != NULL)
+ xen_is_64bit = 0;
+ }
+ if (xen_is_64bit < 0) {
+ cmn_err(CE_WARN, "Couldn't get capability info from Xen.");
+ return (-1);
+ }
+#ifdef __amd64
+ ASSERT(xen_is_64bit == 1);
+#endif
+
+ /*
+ * Allocate space for the shared_info page and tell Xen where it
+ * is.
+ */
+ HYPERVISOR_shared_info = xen_alloc_pages(1);
+ shared_info_frame = hat_getpfnum(kas.a_hat,
+ (caddr_t)HYPERVISOR_shared_info);
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = shared_info_frame;
+ if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) {
+ cmn_err(CE_WARN, "Could not get shared_info page from Xen."
+ " error: %d", err);
+ return (-1);
+ }
+
+ /* Set up the grant tables. */
+ gnttab_init();
+
+ /* Set up event channel support */
+ if (ec_init(xpv_dip) != 0)
+ return (-1);
+
+ /* Set up xenbus */
+ xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
+ xs_early_init();
+ xs_domu_init();
+
+ /* Set up for suspend/resume/migrate */
+ xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
+ maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
+ shutdown_watch.node = "control/shutdown";
+ shutdown_watch.callback = xen_shutdown_handler;
+ if (register_xenbus_watch(&shutdown_watch))
+ cmn_err(CE_WARN, "Failed to set shutdown watcher");
+
+ return (0);
+}
+
+static void
+xen_pv_fini()
+{
+ if (xen_info != NULL)
+ kmem_free(xen_info, sizeof (start_info_t));
+ ec_fini();
+}
+
+/*ARGSUSED*/
+static int
+xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+ if (getminor((dev_t)arg) != XPV_MINOR)
+ return (DDI_FAILURE);
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = xpv_dip;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = 0;
+ break;
+ default:
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static int
+xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
+ ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ xpv_dip = dip;
+
+ if (xen_pv_init(dip) != 0)
+ return (DDI_FAILURE);
+
+ ddi_report_dev(dip);
+
+ /*
+ * If the memscrubber attempts to scrub the pages we hand to Xen,
+ * the domain will panic.
+ */
+ memscrub_disable();
+
+ /*
+ * Report our version to dom0.
+ */
+ if (xenbus_printf(XBT_NULL, "hvmpv/xpv", "version", "%d",
+ HVMPV_XPV_VERS))
+ cmn_err(CE_WARN, "xpv: couldn't write version\n");
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Attempts to reload the PV driver plumbing hang on Intel platforms, so
+ * we don't want to unload the framework by accident.
+ */
+int xpv_allow_detach = 0;
+
+static int
+xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH || xpv_allow_detach == 0)
+ return (DDI_FAILURE);
+
+ if (xpv_dip != NULL) {
+ xen_pv_fini();
+ ddi_remove_minor_node(dip, NULL);
+ xpv_dip = NULL;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED1*/
+static int
+xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr)
+{
+ return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO);
+}
+
+/*ARGSUSED*/
+static int
+xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr,
+ int *rval_p)
+{
+ return (EINVAL);
+}
+
+int
+_init(void)
+{
+ int err;
+
+ if ((err = mod_install(&modl)) != 0)
+ return (err);
+
+ impl_bus_add_probe(xpv_enumerate);
+ return (0);
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modl)) != 0)
+ return (err);
+
+ impl_bus_delete_probe(xpv_enumerate);
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modl, modinfop));
+}
diff --git a/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile b/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile
new file mode 100644
index 0000000000..5ae59dd0bb
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile
@@ -0,0 +1,102 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/pv_cmdk/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the xdc driver.
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = cmdk
+OBJECTS = $(PV_CMDK_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(PV_CMDK_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# When generating lint libraries, we want the name of the lint module
+# that will be generated to by pv_cmdk and not cmdk, so override the
+# default lint module name here.
+#
+LINT_MODULE = pv_cmdk
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(LINT_MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy -Nmisc/strategy -Nmisc/cmlb
+LDFLAGS += -Ndrv/xpvd -Ndrv/xdf
+
+#
+# The Xen header files do not lint cleanly. Since the troublesome
+# structures form part of the externally defined interface to the
+# hypervisor, we're stuck with the noise.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile b/usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile
new file mode 100644
index 0000000000..a2cabdef52
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile
@@ -0,0 +1,90 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/pv_rtls/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the null rtls module for xvm.
+#
+# i86pc implementation architecture dependent
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = rtls
+OBJECTS = $(PV_RTLS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(PV_RTLS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# When generating lint libraries, we want the name of the lint module
+# that will be generated to be pv_rtls and not rtls, so override the
+# default lint module name here.
+#
+LINT_MODULE = pv_rtls
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(LINT_MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/sys/xpv_support.h b/usr/src/uts/i86pc/i86hvm/sys/xpv_support.h
new file mode 100644
index 0000000000..c42551b4f8
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/sys/xpv_support.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_XPV_SUPPORT_H
+#define _SYS_XPV_SUPPORT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__
+
+#if !defined(_ASM)
+
+#include <sys/types.h>
+#include <sys/inttypes.h>
+#include <sys/dditypes.h>
+
+typedef ulong_t mfn_t;
+typedef uint64_t maddr_t;
+#define mfn_to_ma(mfn) ((maddr_t)(mfn) << MMU_PAGESHIFT)
+#define MFN_INVALID (-(mfn_t)1)
+
+#define IPL_DEBUG 15 /* domain debug interrupt */
+#define IPL_CONS 9
+#define IPL_VIF 6
+#define IPL_VBD 5
+#define IPL_EVTCHN 1
+
+#define INVALID_EVTCHN 0
+
+typedef uint_t (*ec_handler_fcn_t)();
+
+extern int ec_init(dev_info_t *);
+extern void ec_fini();
+extern void ec_bind_evtchn_to_handler(int, pri_t, ec_handler_fcn_t, void *);
+extern void ec_unbind_evtchn(int);
+extern void ec_notify_via_evtchn(uint_t);
+extern void hypervisor_mask_event(uint_t);
+extern void hypervisor_unmask_event(uint_t);
+
+extern int xen_bind_interdomain(int, int, int *);
+extern int xen_alloc_unbound_evtchn(int, int *);
+extern int xen_xlate_errcode(int error);
+extern void *xen_alloc_pages(pgcnt_t cnt);
+extern void kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level);
+
+/*
+ * Stub functions to allow the FE drivers to build without littering them
+ * with #ifdefs
+ */
+extern void balloon_drv_added(int64_t);
+extern long balloon_free_pages(uint_t, mfn_t *, caddr_t, pfn_t *);
+extern void xen_release_pfn(pfn_t, caddr_t);
+extern void reassign_pfn(pfn_t, mfn_t);
+
+extern int xen_is_64bit;
+
+#define IN_XPV_PANIC() (__lintzero)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __ASM */
+#endif /* _SYS_XPV_SUPPORT_H */
diff --git a/usr/src/uts/i86pc/i86hvm/xdf/Makefile b/usr/src/uts/i86pc/i86hvm/xdf/Makefile
new file mode 100644
index 0000000000..4b7bbe75d8
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/xdf/Makefile
@@ -0,0 +1,89 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# uts/i86pc/xdf/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# i86pc architecture dependent
+#
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = xdf
+OBJECTS = $(XDF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(XDF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+# Overrides
+CPPFLAGS += -DHVMPV_XDF_VERS=1
+LDFLAGS += -dy -Nmisc/cmlb -Ndrv/xpvd -Ndrv/xpv
+
+LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/xnf/Makefile b/usr/src/uts/i86pc/i86hvm/xnf/Makefile
new file mode 100644
index 0000000000..683572496f
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/xnf/Makefile
@@ -0,0 +1,95 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# uts/i86pc/xnf/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the xve
+# network driver kernel module.
+#
+# i86pc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = xnf
+OBJECTS = $(XNF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(XNF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Driver depends on MAC & IP
+#
+CPPFLAGS += -DHVMPV_XNF_VERS=1
+LDFLAGS += -dy -Nmisc/mac -Ndrv/ip -Ndrv/xpvd -Ndrv/xpv
+
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/xpv/Makefile b/usr/src/uts/i86pc/i86hvm/xpv/Makefile
new file mode 100644
index 0000000000..a1ff318e68
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/xpv/Makefile
@@ -0,0 +1,98 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/xpv/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the xpv
+# driver, which provides the necessary infrastructure for
+# paravirtualized front-end drivers in HVM systems.
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = xpv
+OBJECTS = $(XPV_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(XPV_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/i86pc/i86hvm/io/xpv
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -DHVMPV_XPV_VERS=1
+LDFLAGS += -dy -N mach/pcplusmp
+
+#
+# The Xen header files do not lint cleanly. Since the troublesome
+# structures form part of the externally defined interface to the
+# hypervisor, we're stuck with the noise.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/xpvd/Makefile b/usr/src/uts/i86pc/i86hvm/xpvd/Makefile
new file mode 100644
index 0000000000..283bd34e5e
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/xpvd/Makefile
@@ -0,0 +1,90 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the xpvd nexus driver
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = xpvd
+OBJECTS = $(XPVD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(XPVD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/xen/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -DHVMPV_XPVD_VERS=1
+LDFLAGS += -dy -Ndrv/xpv
+
+LINTTAGS += -erroff=E_STATIC_UNUSED
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ