diff options
| author | edp <none@none> | 2008-04-14 22:44:34 -0700 |
|---|---|---|
| committer | edp <none@none> | 2008-04-14 22:44:34 -0700 |
| commit | eb0cc229f19c437a6b538d3ac0d0443268290b7e (patch) | |
| tree | e4d394e7fb7dcf49c28308fb78ab8627e6a9a553 /usr/src/uts/i86pc/i86hvm | |
| parent | 6eb35ee750312cc65aa066dd0f625b9d54c6f86e (diff) | |
| download | illumos-joyent-eb0cc229f19c437a6b538d3ac0d0443268290b7e.tar.gz | |
6683029 Install -G and i86hvm don't mix well
6683772 yacc yacks when confronted with libumem
--HG--
rename : usr/src/uts/i86pc/Makefile.hvm => usr/src/uts/i86pc/i86hvm/Makefile.i86hvm
rename : usr/src/uts/i86pc/hvm_bootstrap/Makefile => usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile
rename : usr/src/uts/common/xen/io/hvm_bootstrap.c => usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c
rename : usr/src/uts/i86pc/io/pv_cmdk.c => usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c
rename : usr/src/uts/i86pc/io/pv_rtls.c => usr/src/uts/i86pc/i86hvm/io/pv_rtls.c
rename : usr/src/uts/i86pc/io/xpv/evtchn.c => usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c
rename : usr/src/uts/i86pc/io/xpv/xpv.conf => usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf
rename : usr/src/uts/i86pc/io/xpv/xpv_support.c => usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c
rename : usr/src/uts/i86pc/pv_cmdk/Makefile => usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile
rename : usr/src/uts/i86pc/pv_rtls/Makefile => usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile
rename : usr/src/uts/i86pc/sys/xpv_support.h => usr/src/uts/i86pc/i86hvm/sys/xpv_support.h
rename : usr/src/uts/i86pc/xdf/Makefile => usr/src/uts/i86pc/i86hvm/xdf/Makefile
rename : usr/src/uts/i86pc/xnf/Makefile => usr/src/uts/i86pc/i86hvm/xnf/Makefile
rename : usr/src/uts/i86pc/xpv/Makefile => usr/src/uts/i86pc/i86hvm/xpv/Makefile
rename : usr/src/uts/i86pc/xpvd/Makefile => usr/src/uts/i86pc/i86hvm/xpvd/Makefile
Diffstat (limited to 'usr/src/uts/i86pc/i86hvm')
19 files changed, 4179 insertions, 0 deletions
diff --git a/usr/src/uts/i86pc/i86hvm/Makefile b/usr/src/uts/i86pc/i86hvm/Makefile new file mode 100644 index 0000000000..07de533f70 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/Makefile @@ -0,0 +1,108 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/i86hvm/Makefile +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the i86hvm platform modules. +# +# i86pc implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +def := TARGET= def +all := TARGET= all +install := TARGET= install +install_h := TARGET= install_h +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint +lintlib := TARGET= lintlib +modlintlib := TARGET= modlintlib +modlist := TARGET= modlist +modlist := NO_STATE= -K $$MODSTATE$$$$ +clean.lint := TARGET= clean.lint +check := TARGET= check + +# +# Default build targets. +# +.KEEP_STATE: + +.PARALLEL: $(HVM_KMODS) + +def all clean clobber clean.lint modlist modlintlib: $(HVM_KMODS) + +install: install_implementations .WAIT \ + $(HVM_KMODS) + +install_implementations: \ + $(ROOT_HVM_DIR) \ + $(ROOT_HVM_DRV_DIR) \ + $(ROOT_HVM_MISC_DIR) \ + $(USR_HVM_DIR) + +$(HVM_KMODS): FRC + @cd $@; pwd; $(MAKE) $(NO_STATE) $(TARGET) + +install_h check: FRC + +lintlib lint: modlintlib .WAIT $(LINT_DEPS) + +# +# The 'lint.platmod' target lints the i86hvm platform modules against the i86pc +# kernel. This ends up doing all the kernel cross-checks. +# +LINT_TARGET = lint.platmod +INTEL_LIB_DIR = $(UTSBASE)/intel/lint-libs/$(OBJS_DIR) +INTEL_LINTS = genunix +LINT_LIBS = $(LINT_LIB) \ + -L$(HVM_LINT_LIB_DIR) \ + -L$(LINT_LIB_DIR) \ + $(GENUNIX_KMODS:%=-l%) \ + $(PARALLEL_KMODS:%=-l%) \ + $(CLOSED_KMODS:%=-l%) \ + -L$(INTEL_LIB_DIR) \ + $(INTEL_LINTS:%=-l%) + +# workaround for multiply defined errors +lint.platmod := LINTFLAGS += -erroff=E_NAME_MULTIPLY_DEF2 + +lint.platmod: modlintlib + @-$(ECHO) "\ni86hvm platform-dependent module: global crosschecks:" + @-$(LINT) $(LINTFLAGS) $(LINT_LIBS) 2>&1 | $(LGREP.2) + +# +# Include common targets. +# +include $(UTSBASE)/$(PLATFORM)/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.files b/usr/src/uts/i86pc/i86hvm/Makefile.files new file mode 100644 index 0000000000..03ff880f7c --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/Makefile.files @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This Makefile defines file modules in the directory uts/i86pc/i86hvm +# and its children. These are the source files which are i86pc/i86hvm +# "implementation architecture" dependent. +# + +# +# Define objects +# +PV_CMDK_OBJS += pv_cmdk.o +PV_RTLS_OBJS += pv_rtls.o +HVM_BOOTSTRAP_OBJS += hvm_bootstrap.o +XDF_OBJS += xdf.o +XNF_OBJS += xnf.o +XPV_OBJS += xpv_support.o xvdi.o gnttab.o evtchn.o \ + xenbus_comms.o xenbus_client.o xenbus_probe.o \ + xenbus_xs.o hypercall.o hypersubr.o +XPVD_OBJS += xpvd.o + +# +# Include i86hvm header files +# -I$(UTSBASE)/../common +INC_PATH += -I$(UTSBASE)/common/xen -I$(UTSBASE)/i86pc/i86hvm diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm b/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm new file mode 100644 index 0000000000..0e414c5fb1 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm @@ -0,0 +1,66 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/Makefile.hvm +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile provides support for building PV drivers that run +# in an HVM environment. +# + +ROOT_HVM_DIR = $(ROOT_PLAT_DIR)/i86hvm +ROOT_HVM_MOD_DIR = $(ROOT_HVM_DIR)/kernel +ROOT_HVM_DRV_DIR_32 = $(ROOT_HVM_MOD_DIR)/drv +ROOT_HVM_DRV_DIR_64 = $(ROOT_HVM_MOD_DIR)/drv/$(MACH64) +ROOT_HVM_DRV_DIR = $(ROOT_HVM_DRV_DIR_$(CLASS)) +ROOT_HVM_MISC_DIR_32 = $(ROOT_HVM_MOD_DIR)/misc +ROOT_HVM_MISC_DIR_64 = $(ROOT_HVM_MOD_DIR)/misc/$(MACH64) +ROOT_HVM_MISC_DIR = $(ROOT_HVM_MISC_DIR_$(CLASS)) +USR_HVM_DIR = $(USR_PLAT_DIR)/i86hvm + +HVM_LINT_LIB_DIR= $(UTSBASE)/$(PLATFORM)/i86hvm/lint-libs/$(OBJS_DIR) + +# +# Define modules. +# +HVM_DRV_KMODS = pv_cmdk pv_rtls xdf xnf xpv xpvd +HVM_MISC_KMODS = hvm_bootstrap +HVM_KMODS = $(HVM_DRV_KMODS) $(HVM_MISC_KMODS) + +include $(UTSBASE)/i86pc/i86hvm/Makefile.files +# +# Include common rules. +# +include $(UTSBASE)/i86pc/Makefile.i86pc + +LINTS_DIR = $(OBJS_DIR) +LINT_LIB_DIR = $(UTSBASE)/$(PLATFORM)/lint-libs/$(OBJS_DIR) + +# +# Indicate that we are building for the i86hvm semi-platform. +# Also use Solaris specific code in xen public header files. +# +CPPFLAGS += -DXPV_HVM_DRIVER -D_SOLARIS +ASFLAGS += -DXPV_HVM_DRIVER diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.rules b/usr/src/uts/i86pc/i86hvm/Makefile.rules new file mode 100644 index 0000000000..4a9c0edec8 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/Makefile.rules @@ -0,0 +1,73 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +# +# This Makefile defines the build rules for the directory +# uts/i86pc/i86hvm. +# +# The following two-level ordering must be maintained in this file. +# Lines are sorted first in order of decreasing specificity based on +# the first directory component. That is, i86pc rules come before +# intel rules come before common rules. +# +# Lines whose initial directory components are equal are sorted +# alphabetically by the remaining components. + +# +# Section 1a: C object build rules +# + +$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/i86hvm/io/xpv/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/i86hvm/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/xen/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/xen/os/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +# +# Section 1b: Lint `object' build rules +# + +$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/i86hvm/io/xpv/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/i86hvm/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/os/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.targ b/usr/src/uts/i86pc/i86hvm/Makefile.targ new file mode 100644 index 0000000000..29493a64fb --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/Makefile.targ @@ -0,0 +1,63 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#pragma ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Common targets for i86hvm platform-implementation specific modules. +# + +.KEEP_STATE: + +# +# Rules for implementation subdirectories. +# +$(ROOT_HVM_DIR): + -$(INS.dir.root.sys) + +$(ROOT_HVM_MOD_DIR): $(ROOT_HVM_DIR) + -$(INS.dir.root.sys) + +$(ROOT_HVM_DRV_DIR): $(ROOT_MOD_DIR) + -$(INS.dir.root.sys) + +$(ROOT_HVM_MISC_DIR): $(ROOT_MOD_DIR) + -$(INS.dir.root.sys) + +$(ROOT_HVM_MOD_DIR)/%: $(OBJS_DIR)/% $(ROOT_HVM_MOD_DIR) FRC + $(INS.file) + +$(ROOT_HVM_DRV_DIR)/%: $(OBJS_DIR)/% $(ROOT_HVM_DRV_DIR) FRC + $(INS.file) + +$(ROOT_HVM_MISC_DIR)/%: $(OBJS_DIR)/% $(ROOT_HVM_MISC_DIR) FRC + $(INS.file) + +$(USR_HVM_DIR): + -$(INS.dir.root.sys) + +# +# Include common targets. +# +include $(UTSBASE)/$(PLATFORM)/i86hvm/Makefile.rules +include $(UTSBASE)/$(PLATFORM)/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile b/usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile new file mode 100644 index 0000000000..ea250a88a7 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/hvm_bootstrap/Makefile @@ -0,0 +1,83 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# uts/i86pc/hvm_bootstrap/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# i86pc architecture dependent +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = hvm_bootstrap +OBJECTS = $(HVM_BOOTSTRAP_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(HVM_BOOTSTRAP_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# Overrides +LDFLAGS += -dy + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c b/usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c new file mode 100644 index 0000000000..95b9df1a82 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/hvm_bootstrap.c @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/modctl.h> +#include <sys/sunddi.h> +#include <sys/sunndi.h> + +/* + * The hvm_bootstrap misc module is installed in the i86hvm platform + * directly so it will only be loaded in HVM emulated environment. + */ + + +/* + * hvmboot_rootconf() exists to force attach all xdf disk driver nodes + * before the pv cmdk disk driver comes along and tries to access any of + * these nodes (which usually happens when mounting the root disk device + * in an hvm environment). See the block comments at the top of pv_cmdk.c + * for more information about why this is necessary. + */ +int +hvmboot_rootconf() +{ + dev_info_t *xpvd_dip; + major_t xdf_major; + + xdf_major = ddi_name_to_major("xdf"); + if (xdf_major == (major_t)-1) + cmn_err(CE_PANIC, "unable to load xdf disk driver"); + + if (resolve_pathname("/xpvd", &xpvd_dip, NULL, NULL) != 0) + cmn_err(CE_PANIC, "unable to configure /xpvd nexus"); + + (void) ndi_devi_config_driver(xpvd_dip, 0, xdf_major); + + ndi_rele_devi(xpvd_dip); + return (0); +} + +static struct modlmisc modlmisc = { + &mod_miscops, "hvm_bootstrap misc module" +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlmisc, NULL +}; + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_init() +{ + return (mod_install(&modlinkage)); +} + +int +_fini() +{ + return (EBUSY); +} diff --git a/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c b/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c new file mode 100644 index 0000000000..4ad9b06aec --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c @@ -0,0 +1,1541 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/scsi/scsi_types.h> +#include <sys/modctl.h> +#include <sys/cmlb.h> +#include <sys/types.h> +#include <sys/xpv_support.h> +#include <sys/xendev.h> +#include <sys/gnttab.h> +#include <public/xen.h> +#include <public/grant_table.h> +#include <io/xdf.h> +#include <sys/vtoc.h> +#include <sys/dkio.h> +#include <sys/dktp/dadev.h> +#include <sys/dktp/dadkio.h> +#include <sys/dktp/tgdk.h> +#include <sys/dktp/bbh.h> +#include <sys/dktp/cmdk.h> +#include <sys/dktp/altsctr.h> + +/* + * General Notes + * + * We don't support disks with bad block mappins. We have this + * limitation because the underlying xdf driver doesn't support + * bad block remapping. If there is a need to support this feature + * it should be added directly to the xdf driver and we should just + * pass requests strait on through and let it handle the remapping. + * Also, it's probably worth pointing out that most modern disks do bad + * block remapping internally in the hardware so there's actually less + * of a chance of us ever discovering bad blocks. Also, in most cases + * this driver (and the xdf driver) will only be used with virtualized + * devices, so one might wonder why a virtual device would ever actually + * experience bad blocks. To wrap this up, you might be wondering how + * these bad block mappings get created and how they are managed. Well, + * there are two tools for managing bad block mappings, format(1M) and + * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk + * to attempt to find bad block and create mappings for them. Format(1M) + * and addbadsec(1M) can also be used to edit existing mappings that may + * be saved on the disk. + * + * The underlying PV driver that this driver passes on requests to is the + * xdf driver. Since in most cases the xdf driver doesn't deal with + * physical disks it has it's own algorithm for assigning a physical + * geometry to a virtual disk (ie, cylinder count, head count, etc.) + * The default values chosen by the xdf driver may not match those + * assigned to a disk by a hardware disk emulator in an HVM environment. + * This is a problem since these physical geometry attributes affect + * things like the partition table, backup label location, etc. So + * to emulate disk devices correctly we need to know the physical geometry + * that was assigned to a disk at the time of it's initalization. + * Normally in an HVM environment this information will passed to + * the BIOS and operating system from the hardware emulator that is + * emulating the disk devices. In the case of a solaris dom0+xvm + * this would be qemu. So to work around this issue, this driver will + * query the emulated hardware to get the assigned physical geometry + * and then pass this geometry onto the xdf driver so that it can use it. + * But really, this information is essentially metadata about the disk + * that should be kept with the disk image itself. (Assuming or course + * that a disk image is the actual backingstore for this emulated device.) + * This metadata should also be made available to PV drivers via a common + * mechamisn, probably the xenstore. The fact that this metadata isn't + * available outside of HVM domains means that it's difficult to move + * disks between HVM and PV domains, since a fully PV domain will have no + * way of knowing what the correct geometry of the target device is. + * (Short of reading the disk, looking for things like partition tables + * and labels, and taking a best guess at what the geometry was when + * the disk was initialized. Unsuprisingly, qemu actually does this.) + * + * This driver has to map cmdk device instances into their corresponding + * xdf device instances. We have to do this to ensure that when a user + * accesses a emulated cmdk device we map those accesses to the proper + * paravirtualized device. Basically what we need to know is how multiple + * 'disk' entries in a domU configuration file get mapped to emulated + * cmdk devices and to xdf devices. The 'disk' entry to xdf instance + * mappings we know because those are done within the Solaris xvdi code + * and the xpvd nexus driver. But the config to emulated devices mappings + * are handled entirely within the xen management tool chain and the + * hardware emulator. Since all the tools that establish these mappings + * live in dom0, dom0 should really supply us with this information, + * probably via the xenstore. Unfortunatly it doesn't so, since there's + * no good way to determine this mapping dynamically, this driver uses + * a hard coded set of static mappings. These mappings are hardware + * emulator specific because each different hardware emulator could have + * a different device tree with different cmdk device paths. This + * means that if we want to continue to use this static mapping approach + * to allow Solaris to run on different hardware emulators we'll have + * to analyze each of those emulators to determine what paths they + * use and hard code those paths into this driver. yech. This metadata + * really needs to be supplied to us by dom0. + * + * This driver access underlying xdf nodes. Unfortunatly, devices + * must create minor nodes during attach, and for disk devices to create + * minor nodes, they have to look at the label on the disk, so this means + * that disk drivers must be able to access a disk contents during + * attach. That means that this disk driver must be able to access + * underlying xdf nodes during attach. Unfortunatly, due to device tree + * locking restrictions, we cannot have an attach operation occuring on + * this device and then attempt to access another device which may + * cause another attach to occur in a different device tree branch + * since this could result in deadlock. Hence, this driver can only + * access xdf device nodes that we know are attached, and it can't use + * any ddi interfaces to access those nodes if those interfaces could + * trigger an attach of the xdf device. So this driver works around + * these restrictions by talking directly to xdf devices via + * xdf_hvm_hold(). This interface takes a pathname to an xdf device, + * and if that device is already attached then it returns the a held dip + * pointer for that device node. This prevents us from getting into + * deadlock situations, but now we need a mechanism to ensure that all + * the xdf device nodes this driver might access are attached before + * this driver tries to access them. This is accomplished via the + * hvmboot_rootconf() callback which is invoked just before root is + * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure + * all xdf device visible to the system. All these xdf device nodes + * will also be marked with the "ddi-no-autodetach" property so that + * once they are configured, the will not be automatically unconfigured. + * The only way that they could be unconfigured is if the administrator + * explicitly attempts to unload required modules via rem_drv(1M) + * or modunload(1M). + */ + +/* + * 16 paritions + fdisk (see xdf.h) + */ +#define XDF_DEV2UNIT(dev) XDF_INST((getminor((dev)))) +#define XDF_DEV2PART(dev) XDF_PART((getminor((dev)))) + +#define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \ + (otyp == OTYP_CHR) || \ + (otyp == OTYP_LYR)) + +#define PV_CMDK_NODES 4 + +typedef struct hvm_to_pv { + char *h2p_hvm_path; + char *h2p_pv_path; +} hvm_to_pv_t; + +/* + */ +static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = { + /* + * The paths mapping here are very specific to xen and qemu. When a + * domU is booted under xen in HVM mode, qemu is normally used to + * emulate up to four ide disks. These disks always have the four + * path listed below. To configure an emulated ide device, the + * xen domain configuration file normally has an entry that looks + * like this: + * disk = [ 'file:/foo.img,hda,w' ] + * + * The part we're interested in is the 'hda', which we'll call the + * xen disk device name here. The xen management tools (which parse + * the xen domain configuration file and launch qemu) makes the + * following assumptions about this value: + * hda == emulated ide disk 0 (ide bus 0, master) + * hdb == emulated ide disk 1 (ide bus 0, slave) + * hdc == emulated ide disk 2 (ide bus 1, master) + * hdd == emulated ide disk 3 (ide bus 1, slave) + * + * (Uncoincidentally, these xen disk device names actually map to + * the /dev filesystem names of ide disk devices in Linux. So in + * Linux /dev/hda is the first ide disk.) So for the first part of + * our mapping we've just hardcoded the cmdk paths that we know + * qemu will use. + * + * To understand the second half of the mapping (ie, the xdf device + * that each emulated cmdk device should be mapped two) we need to + * know the solaris device node address that will be assigned to + * each xdf device. (The device node address is the hex number that + * comes after the "xdf@" in the device path.) + * + * Normally when a domU is run in non-HVM mode, the xen disk device + * names in the xen domain configuration file are specified with + * integers instead of Linux device names. (for example, '0' would + * be used instead of 'hda'.) So in the non-HVM case we simply + * convert the xen disk device name (which is an interger) into a + * hex number and use it as the Solaris xdf device node address. + * But when we're running in HVM mode then we have a string for the + * xen disk device name, so we can't simply use that as a solaris + * device node address. Instead we fall back to using the xenstore + * device id for the xen disk device as the xdf device node address. + * The xdf device node address assignment happens in xvdi_init_dev(). + * + * So the question becomes, how do we know what the xenstore device + * id for emulated disk will be? Well, it turns out that since the + * xen management tools expect the disk device names to be Linux + * device names, those same management tools assign each disk a + * device id that matches the dev_t of the corresponding device + * under Linux. (Big shocker.) This xen device name-to-id mapping + * is currently all hard coded here: + * xen.hg/tools/python/xen/util/blkif.py`blkdev_name_to_number() + * + * So looking at the code above we can see the following xen disk + * device name to xenstore device id mappings: + * 'hda' --> 0x300 == 0t768 == ((3 * 256) + (0 * 64)) + * 'hdb' --> 0x340 == 0t832 == ((3 * 256) + (1 * 64)) + * 'hdc' --> 0x1600 == 0t5632 == ((22 * 256) + (0 * 64)) + * 'hdd' --> 0x1640 == 0t5696 == ((22 * 256) + (1 * 64)) + */ + { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@0,0", "/xpvd/xdf@300" }, + { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@1,0", "/xpvd/xdf@340" }, + { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@0,0", "/xpvd/xdf@1600" }, + { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@1,0", "/xpvd/xdf@1640" }, + { NULL, 0 } +}; + +typedef struct pv_cmdk { + dev_info_t *dk_dip; + cmlb_handle_t dk_cmlbhandle; + ddi_devid_t dk_devid; + kmutex_t dk_mutex; + dev_info_t *dk_xdf_dip; + dev_t dk_xdf_dev; + int dk_xdf_otyp_count[OTYPCNT][XDF_PEXT]; + ldi_handle_t dk_xdf_lh[XDF_PEXT]; +} pv_cmdk_t; + +/* + * Globals + */ +static void *pv_cmdk_state; +static major_t pv_cmdk_major; +static hvm_to_pv_t *pv_cmdk_h2p; + +/* + * Function prototypes for xdf callback functions + */ +extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); +extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, + void *); + +static boolean_t +pv_cmdk_isopen_part(struct pv_cmdk *dkp, int part) +{ + int otyp; + + ASSERT(MUTEX_HELD(&dkp->dk_mutex)); + + for (otyp = 0; (otyp < OTYPCNT); otyp++) { + if (dkp->dk_xdf_otyp_count[otyp][part] != 0) + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Cmlb ops vectors, allows the cmlb module to directly access the entire + * pv_cmdk disk device without going through any partitioning layers. + */ +/*ARGSUSED*/ +static int +pv_cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, + diskaddr_t start, size_t count, void *tg_cookie) +{ + int instance = ddi_get_instance(dip); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + + if (dkp == NULL) + return (ENXIO); + + return (xdf_lb_rdwr(dkp->dk_xdf_dip, cmd, bufaddr, start, count, + tg_cookie)); +} + +/*ARGSUSED*/ +static int +pv_cmdk_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) +{ + int instance = ddi_get_instance(dip); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + int err; + + if (dkp == NULL) + return (ENXIO); + + if (cmd == TG_GETVIRTGEOM) { + cmlb_geom_t pgeom, *vgeomp; + diskaddr_t capacity; + + /* + * The native xdf driver doesn't support this ioctl. + * Intead of passing it on, emulate it here so that the + * results look the same as what we get for a real cmdk + * device. + * + * Get the real size of the device + */ + if ((err = xdf_lb_getinfo(dkp->dk_xdf_dip, + TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0) + return (err); + capacity = pgeom.g_capacity; + + /* + * If the controller returned us something that doesn't + * really fit into an Int 13/function 8 geometry + * result, just fail the ioctl. See PSARC 1998/313. + */ + if (capacity >= (63 * 254 * 1024)) + return (EINVAL); + + vgeomp = (cmlb_geom_t *)arg; + vgeomp->g_capacity = capacity; + vgeomp->g_nsect = 63; + vgeomp->g_nhead = 254; + vgeomp->g_ncyl = capacity / (63 * 254); + vgeomp->g_acyl = 0; + vgeomp->g_secsize = 512; + vgeomp->g_intrlv = 1; + vgeomp->g_rpm = 3600; + return (0); + } + + return (xdf_lb_getinfo(dkp->dk_xdf_dip, cmd, arg, tg_cookie)); +} + +static cmlb_tg_ops_t pv_cmdk_lb_ops = { + TG_DK_OPS_VERSION_1, + pv_cmdk_lb_rdwr, + pv_cmdk_lb_getinfo +}; + +/* + * devid management functions + */ + +/* + * pv_cmdk_get_modser() is basically a local copy of + * cmdk_get_modser() modified to work without the dadk layer. + * (which the non-pv version of the cmdk driver uses.) + */ +static int +pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len) +{ + struct scsi_device *scsi_device; + opaque_t ctlobjp; + dadk_ioc_string_t strarg; + char *s; + char ch; + boolean_t ret; + int i; + int tb; + + strarg.is_buf = buf; + strarg.is_size = len; + scsi_device = ddi_get_driver_private(dkp->dk_dip); + ctlobjp = scsi_device->sd_address.a_hba_tran; + if (CTL_IOCTL(ctlobjp, + ioccmd, (uintptr_t)&strarg, FNATIVE | FKIOCTL) != 0) + return (0); + + /* + * valid model/serial string must contain a non-zero non-space + * trim trailing spaces/NULL + */ + ret = B_FALSE; + s = buf; + for (i = 0; i < strarg.is_size; i++) { + ch = *s++; + if (ch != ' ' && ch != '\0') + tb = i + 1; + if (ch != ' ' && ch != '\0' && ch != '0') + ret = B_TRUE; + } + + if (ret == B_FALSE) + return (0); + + return (tb); +} + +/* + * pv_cmdk_devid_modser() is basically a copy of cmdk_devid_modser() + * that has been modified to use local pv cmdk driver functions. + * + * Build a devid from the model and serial number + * Return DDI_SUCCESS or DDI_FAILURE. + */ +static int +pv_cmdk_devid_modser(struct pv_cmdk *dkp) +{ + int rc = DDI_FAILURE; + char *hwid; + int modlen; + int serlen; + + /* + * device ID is a concatenation of model number, '=', serial number. + */ + hwid = kmem_alloc(CMDK_HWIDLEN, KM_SLEEP); + modlen = pv_cmdk_get_modser(dkp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN); + if (modlen == 0) + goto err; + + hwid[modlen++] = '='; + serlen = pv_cmdk_get_modser(dkp, DIOCTL_GETSERIAL, + hwid + modlen, CMDK_HWIDLEN - modlen); + if (serlen == 0) + goto err; + + hwid[modlen + serlen] = 0; + + /* Initialize the device ID, trailing NULL not included */ + rc = ddi_devid_init(dkp->dk_dip, DEVID_ATA_SERIAL, modlen + serlen, + hwid, (ddi_devid_t *)&dkp->dk_devid); + if (rc != DDI_SUCCESS) + goto err; + + kmem_free(hwid, CMDK_HWIDLEN); + return (DDI_SUCCESS); + +err: + kmem_free(hwid, CMDK_HWIDLEN); + return (DDI_FAILURE); +} + +/* + * pv_cmdk_devid_read() is basically a local copy of + * cmdk_devid_read() modified to work without the dadk layer. + * (which the non-pv version of the cmdk driver uses.) + * + * Read a devid from on the first block of the last track of + * the last cylinder. Make sure what we read is a valid devid. + * Return DDI_SUCCESS or DDI_FAILURE. + */ +static int +pv_cmdk_devid_read(struct pv_cmdk *dkp) +{ + diskaddr_t blk; + struct dk_devid *dkdevidp; + uint_t *ip, chksum; + int i; + + if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) + return (DDI_FAILURE); + + dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP); + if (pv_cmdk_lb_rdwr(dkp->dk_dip, + TG_READ, dkdevidp, blk, NBPSCTR, NULL) != 0) + goto err; + + /* Validate the revision */ + if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) || + (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB)) + goto err; + + /* Calculate the checksum */ + chksum = 0; + ip = (uint_t *)dkdevidp; + for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++) + chksum ^= ip[i]; + if (DKD_GETCHKSUM(dkdevidp) != chksum) + goto err; + + /* Validate the device id */ + if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS) + goto err; + + /* keep a copy of the device id */ + i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid); + dkp->dk_devid = kmem_alloc(i, KM_SLEEP); + bcopy(dkdevidp->dkd_devid, dkp->dk_devid, i); + kmem_free(dkdevidp, NBPSCTR); + return (DDI_SUCCESS); + +err: + kmem_free(dkdevidp, NBPSCTR); + return (DDI_FAILURE); +} + +/* + * pv_cmdk_devid_fabricate() is basically a local copy of + * cmdk_devid_fabricate() modified to work without the dadk layer. + * (which the non-pv version of the cmdk driver uses.) + * + * Create a devid and write it on the first block of the last track of + * the last cylinder. + * Return DDI_SUCCESS or DDI_FAILURE. + */ +static int +pv_cmdk_devid_fabricate(struct pv_cmdk *dkp) +{ + ddi_devid_t devid = NULL; /* devid made by ddi_devid_init */ + struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */ + diskaddr_t blk; + uint_t *ip, chksum; + int i; + + if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) + return (DDI_FAILURE); + + if (ddi_devid_init(dkp->dk_dip, DEVID_FAB, 0, NULL, &devid) != + DDI_SUCCESS) + return (DDI_FAILURE); + + /* allocate a buffer */ + dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP); + + /* Fill in the revision */ + dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB; + dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB; + + /* Copy in the device id */ + i = ddi_devid_sizeof(devid); + if (i > DK_DEVID_SIZE) + goto err; + bcopy(devid, dkdevidp->dkd_devid, i); + + /* Calculate the chksum */ + chksum = 0; + ip = (uint_t *)dkdevidp; + for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++) + chksum ^= ip[i]; + + /* Fill in the checksum */ + DKD_FORMCHKSUM(chksum, dkdevidp); + + if (pv_cmdk_lb_rdwr(dkp->dk_dip, + TG_WRITE, dkdevidp, blk, NBPSCTR, NULL) != 0) + goto err; + + kmem_free(dkdevidp, NBPSCTR); + + dkp->dk_devid = devid; + return (DDI_SUCCESS); + +err: + if (dkdevidp != NULL) + kmem_free(dkdevidp, NBPSCTR); + if (devid != NULL) + ddi_devid_free(devid); + return (DDI_FAILURE); +} + +/* + * pv_cmdk_devid_setup() is basically a local copy ofcmdk_devid_setup() + * that has been modified to use local pv cmdk driver functions. + * + * Create and register the devid. + * There are 4 different ways we can get a device id: + * 1. Already have one - nothing to do + * 2. Build one from the drive's model and serial numbers + * 3. Read one from the disk (first sector of last track) + * 4. Fabricate one and write it on the disk. + * If any of these succeeds, register the deviceid + */ +static void +pv_cmdk_devid_setup(struct pv_cmdk *dkp) +{ + int rc; + + /* Try options until one succeeds, or all have failed */ + + /* 1. All done if already registered */ + + if (dkp->dk_devid != NULL) + return; + + /* 2. Build a devid from the model and serial number */ + rc = pv_cmdk_devid_modser(dkp); + if (rc != DDI_SUCCESS) { + /* 3. Read devid from the disk, if present */ + rc = pv_cmdk_devid_read(dkp); + + /* 4. otherwise make one up and write it on the disk */ + if (rc != DDI_SUCCESS) + rc = pv_cmdk_devid_fabricate(dkp); + } + + /* If we managed to get a devid any of the above ways, register it */ + if (rc == DDI_SUCCESS) + (void) ddi_devid_register(dkp->dk_dip, dkp->dk_devid); +} + +/* + * Local Functions + */ +static int +pv_cmdk_iodone(struct buf *bp) +{ + struct buf *bp_orig = bp->b_chain; + + /* Propegate back the io results */ + bp_orig->b_resid = bp->b_resid; + bioerror(bp_orig, geterror(bp)); + biodone(bp_orig); + + freerbuf(bp); + return (0); +} + +static int +pv_cmdkstrategy(struct buf *bp) +{ + dev_t dev = bp->b_edev; + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + dev_t xdf_devt; + struct buf *bp_clone; + + /* + * Sanity checks that the dev_t associated with the buf we were + * passed actually corresponds us and that the partition we're + * trying to access is actually open. On debug kernels we'll + * panic and on non-debug kernels we'll return failure. + */ + ASSERT(getmajor(dev) == pv_cmdk_major); + if (getmajor(dev) != pv_cmdk_major) + goto err; + + mutex_enter(&dkp->dk_mutex); + ASSERT(pv_cmdk_isopen_part(dkp, part)); + if (!pv_cmdk_isopen_part(dkp, part)) { + mutex_exit(&dkp->dk_mutex); + goto err; + } + mutex_exit(&dkp->dk_mutex); + + /* clone this buffer */ + xdf_devt = dkp->dk_xdf_dev | part; + bp_clone = bioclone(bp, 0, bp->b_bcount, xdf_devt, bp->b_blkno, + pv_cmdk_iodone, NULL, KM_SLEEP); + bp_clone->b_chain = bp; + + /* + * If we're being invoked on behalf of the physio() call in + * pv_cmdk_dioctl_rwcmd() then b_private will be set to + * XB_SLICE_NONE and we need to propegate this flag into the + * cloned buffer so that the xdf driver will see it. + */ + if (bp->b_private == (void *)XB_SLICE_NONE) + bp_clone->b_private = (void *)XB_SLICE_NONE; + + /* + * Pass on the cloned buffer. Note that we don't bother to check + * for failure because the xdf strategy routine will have to + * invoke biodone() if it wants to return an error, which means + * that the pv_cmdk_iodone() callback will get invoked and it + * will propegate the error back up the stack and free the cloned + * buffer. + */ + ASSERT(dkp->dk_xdf_lh[part] != NULL); + return (ldi_strategy(dkp->dk_xdf_lh[part], bp_clone)); + +err: + bioerror(bp, ENXIO); + bp->b_resid = bp->b_bcount; + biodone(bp); + return (0); +} + +/*ARGSUSED*/ +static int +pv_cmdkread(dev_t dev, struct uio *uio, cred_t *credp) +{ + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + + return (ldi_read(dkp->dk_xdf_lh[part], uio, credp)); +} + +/*ARGSUSED*/ +static int +pv_cmdkwrite(dev_t dev, struct uio *uio, cred_t *credp) +{ + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + + return (ldi_write(dkp->dk_xdf_lh[part], uio, credp)); +} + +/*ARGSUSED*/ +static int +pv_cmdkaread(dev_t dev, struct aio_req *aio, cred_t *credp) +{ + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + return (ldi_aread(dkp->dk_xdf_lh[part], aio, credp)); +} + +/*ARGSUSED*/ +static int +pv_cmdkawrite(dev_t dev, struct aio_req *aio, cred_t *credp) +{ + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + return (ldi_awrite(dkp->dk_xdf_lh[part], aio, credp)); +} + +static int +pv_cmdkdump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) +{ + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + + return (ldi_dump(dkp->dk_xdf_lh[part], addr, blkno, nblk)); +} + +/* + * pv_rwcmd_copyin() is a duplicate of rwcmd_copyin(). + */ +static int +pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag) +{ + switch (ddi_model_convert_from(flag)) { + case DDI_MODEL_ILP32: { + struct dadkio_rwcmd32 cmd32; + + if (ddi_copyin(inaddr, &cmd32, + sizeof (struct dadkio_rwcmd32), flag)) { + return (EFAULT); + } + + rwcmdp->cmd = cmd32.cmd; + rwcmdp->flags = cmd32.flags; + rwcmdp->blkaddr = (daddr_t)cmd32.blkaddr; + rwcmdp->buflen = cmd32.buflen; + rwcmdp->bufaddr = (caddr_t)(intptr_t)cmd32.bufaddr; + /* + * Note: we do not convert the 'status' field, + * as it should not contain valid data at this + * point. + */ + bzero(&rwcmdp->status, sizeof (rwcmdp->status)); + break; + } + case DDI_MODEL_NONE: { + if (ddi_copyin(inaddr, rwcmdp, + sizeof (struct dadkio_rwcmd), flag)) { + return (EFAULT); + } + } + } + return (0); +} + +/* + * pv_rwcmd_copyout() is a duplicate of rwcmd_copyout(). + */ +static int +pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag) +{ + switch (ddi_model_convert_from(flag)) { + case DDI_MODEL_ILP32: { + struct dadkio_rwcmd32 cmd32; + + cmd32.cmd = rwcmdp->cmd; + cmd32.flags = rwcmdp->flags; + cmd32.blkaddr = rwcmdp->blkaddr; + cmd32.buflen = rwcmdp->buflen; + ASSERT64(((uintptr_t)rwcmdp->bufaddr >> 32) == 0); + cmd32.bufaddr = (caddr32_t)(uintptr_t)rwcmdp->bufaddr; + + cmd32.status.status = rwcmdp->status.status; + cmd32.status.resid = rwcmdp->status.resid; + cmd32.status.failed_blk_is_valid = + rwcmdp->status.failed_blk_is_valid; + cmd32.status.failed_blk = rwcmdp->status.failed_blk; + cmd32.status.fru_code_is_valid = + rwcmdp->status.fru_code_is_valid; + cmd32.status.fru_code = rwcmdp->status.fru_code; + + bcopy(rwcmdp->status.add_error_info, + cmd32.status.add_error_info, DADKIO_ERROR_INFO_LEN); + + if (ddi_copyout(&cmd32, outaddr, + sizeof (struct dadkio_rwcmd32), flag)) + return (EFAULT); + break; + } + case DDI_MODEL_NONE: { + if (ddi_copyout(rwcmdp, outaddr, + sizeof (struct dadkio_rwcmd), flag)) + return (EFAULT); + } + } + return (0); +} + +static void +pv_cmdkmin(struct buf *bp) +{ + if (bp->b_bcount > DK_MAXRECSIZE) + bp->b_bcount = DK_MAXRECSIZE; +} + +static int +pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag) +{ + struct dadkio_rwcmd *rwcmdp; + struct iovec aiov; + struct uio auio; + struct buf *bp; + int rw, status; + + rwcmdp = kmem_alloc(sizeof (struct dadkio_rwcmd), KM_SLEEP); + status = pv_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag); + + if (status != 0) + goto out; + + switch (rwcmdp->cmd) { + case DADKIO_RWCMD_READ: + case DADKIO_RWCMD_WRITE: + break; + default: + status = EINVAL; + goto out; + } + + bzero((caddr_t)&aiov, sizeof (struct iovec)); + aiov.iov_base = rwcmdp->bufaddr; + aiov.iov_len = rwcmdp->buflen; + + bzero((caddr_t)&auio, sizeof (struct uio)); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_loffset = (offset_t)rwcmdp->blkaddr * (offset_t)XB_BSIZE; + auio.uio_resid = rwcmdp->buflen; + auio.uio_segflg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE; + + /* + * Tell the xdf driver that this I/O request is using an absolute + * offset. + */ + bp = getrbuf(KM_SLEEP); + bp->b_private = (void *)XB_SLICE_NONE; + + rw = ((rwcmdp->cmd == DADKIO_RWCMD_WRITE) ? B_WRITE : B_READ); + status = physio(pv_cmdkstrategy, bp, dev, rw, pv_cmdkmin, &auio); + + biofini(bp); + kmem_free(bp, sizeof (buf_t)); + + if (status == 0) + status = pv_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag); + +out: + kmem_free(rwcmdp, sizeof (struct dadkio_rwcmd)); + return (status); +} + +static int +pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, + int *rvalp) +{ + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + int err; + + switch (cmd) { + default: + return (ldi_ioctl(dkp->dk_xdf_lh[part], + cmd, arg, flag, credp, rvalp)); + case DKIOCGETWCE: + case DKIOCSETWCE: + return (EIO); + case DKIOCADDBAD: { + /* + * This is for ata/ide bad block handling. It is supposed + * to cause the driver to re-read the bad block list and + * alternate map after it has been updated. Our driver + * will refuse to attach to any disk which has a bad blocks + * list defined, so there really isn't much to do here. + */ + return (0); + } + case DKIOCGETDEF: { + /* + * I can't actually find any code that utilizes this ioctl, + * hence we're leaving it explicitly unimplemented. + */ + ASSERT("ioctl cmd unsupported by pv_cmdk: DKIOCGETDEF"); + return (EIO); + } + case DIOCTL_RWCMD: { + /* + * This just seems to just be an alternate interface for + * reading and writing the disk. Great, another way to + * do the same thing... + */ + return (pv_cmdk_dioctl_rwcmd(dev, arg, flag)); + } + case DKIOCINFO: { + dev_info_t *dip = dkp->dk_dip; + struct dk_cinfo info; + + /* Pass on the ioctl request, save the response */ + if ((err = ldi_ioctl(dkp->dk_xdf_lh[part], + cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0) + return (err); + + /* Update controller info */ + info.dki_cnum = ddi_get_instance(ddi_get_parent(dip)); + (void) strlcpy(info.dki_cname, + ddi_get_name(ddi_get_parent(dip)), sizeof (info.dki_cname)); + + /* Update unit info. */ + if (info.dki_ctype == DKC_VBD) + info.dki_ctype = DKC_DIRECT; + info.dki_unit = instance; + (void) strlcpy(info.dki_dname, + ddi_driver_name(dip), sizeof (info.dki_dname)); + info.dki_addr = 1; + + if (ddi_copyout(&info, (void *)arg, sizeof (info), flag)) + return (EFAULT); + return (0); + } + } /* switch (cmd) */ + /*NOTREACHED*/ +} + +/*ARGSUSED*/ +static int +pv_cmdkopen(dev_t *dev_p, int flag, int otyp, cred_t *credp) +{ + ldi_ident_t li; + dev_t dev = *dev_p; + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + dev_t xdf_devt = dkp->dk_xdf_dev | part; + int err = 0; + + if ((otyp < 0) || (otyp >= OTYPCNT)) + return (EINVAL); + + /* allocate an ldi handle */ + VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0); + + mutex_enter(&dkp->dk_mutex); + + /* + * We translate all device opens (chr, blk, and lyr) into + * block device opens. Why? Because for all the opens that + * come through this driver, we only keep around one LDI handle. + * So that handle can only be of one open type. The reason + * that we choose the block interface for this is that to use + * the block interfaces for a device the system needs to allocatex + * buf_ts, which are associated with system memory which can act + * as a cache for device data. So normally when a block device + * is closed the system will ensure that all these pages get + * flushed out of memory. But if we were to open the device + * as a character device, then when we went to close the underlying + * device (even if we had invoked the block interfaces) any data + * remaining in memory wouldn't necessairly be flushed out + * before the device was closed. + */ + if (dkp->dk_xdf_lh[part] == NULL) { + ASSERT(!pv_cmdk_isopen_part(dkp, part)); + + err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, + &dkp->dk_xdf_lh[part], li); + + if (err != 0) { + mutex_exit(&dkp->dk_mutex); + ldi_ident_release(li); + return (err); + } + + /* Disk devices really shouldn't clone */ + ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); + } else { + ldi_handle_t lh_tmp; + + ASSERT(pv_cmdk_isopen_part(dkp, part)); + + /* do ldi open/close to get flags and cred check */ + err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, + &lh_tmp, li); + if (err != 0) { + mutex_exit(&dkp->dk_mutex); + ldi_ident_release(li); + return (err); + } + + /* Disk devices really shouldn't clone */ + ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); + (void) ldi_close(lh_tmp, flag, credp); + } + ldi_ident_release(li); + + dkp->dk_xdf_otyp_count[otyp][part]++; + + mutex_exit(&dkp->dk_mutex); + return (0); +} + +/*ARGSUSED*/ +static int +pv_cmdkclose(dev_t dev, int flag, int otyp, cred_t *credp) +{ + int instance = XDF_DEV2UNIT(dev); + int part = XDF_DEV2PART(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + int err = 0; + + ASSERT((otyp >= 0) && otyp < OTYPCNT); + + /* + * Sanity check that that the dev_t specified corresponds to this + * driver and that the device is actually open. On debug kernels we'll + * panic and on non-debug kernels we'll return failure. + */ + ASSERT(getmajor(dev) == pv_cmdk_major); + if (getmajor(dev) != pv_cmdk_major) + return (ENXIO); + + mutex_enter(&dkp->dk_mutex); + ASSERT(pv_cmdk_isopen_part(dkp, part)); + if (!pv_cmdk_isopen_part(dkp, part)) { + mutex_exit(&dkp->dk_mutex); + return (ENXIO); + } + + ASSERT(dkp->dk_xdf_lh[part] != NULL); + ASSERT(dkp->dk_xdf_otyp_count[otyp][part] > 0); + if (otyp == OTYP_LYR) { + dkp->dk_xdf_otyp_count[otyp][part]--; + } else { + dkp->dk_xdf_otyp_count[otyp][part] = 0; + } + + if (!pv_cmdk_isopen_part(dkp, part)) { + err = ldi_close(dkp->dk_xdf_lh[part], flag, credp); + dkp->dk_xdf_lh[part] = NULL; + } + + mutex_exit(&dkp->dk_mutex); + + return (err); +} + +static int +pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom) +{ + struct scsi_device *scsi_device; + struct tgdk_geom tgdk_geom; + opaque_t ctlobjp; + int err; + + scsi_device = ddi_get_driver_private(dip); + ctlobjp = scsi_device->sd_address.a_hba_tran; + if ((err = CTL_IOCTL(ctlobjp, + DIOCTL_GETPHYGEOM, (uintptr_t)&tgdk_geom, FKIOCTL)) != 0) + return (err); + + /* This driver won't work if this isn't true */ + ASSERT(tgdk_geom.g_secsiz == XB_BSIZE); + + pgeom->g_ncyl = tgdk_geom.g_cyl; + pgeom->g_acyl = tgdk_geom.g_acyl; + pgeom->g_nhead = tgdk_geom.g_head; + pgeom->g_nsect = tgdk_geom.g_sec; + pgeom->g_secsize = tgdk_geom.g_secsiz; + pgeom->g_capacity = tgdk_geom.g_cap; + pgeom->g_intrlv = 1; + pgeom->g_rpm = 3600; + return (0); +} + +/* + * pv_cmdk_bb_check() checks for the existance of bad blocks mappings in + * the alternate partition/slice. Returns B_FALSE is there are no bad + * block mappins found, and B_TRUE is there are bad block mappins found. + */ +static boolean_t +pv_cmdk_bb_check(struct pv_cmdk *dkp) +{ + struct alts_parttbl *ap; + diskaddr_t nblocks, blk; + uint32_t altused, altbase, altlast; + uint16_t vtoctag; + int alts; + + /* find slice with V_ALTSCTR tag */ + for (alts = 0; alts < NDKMAP; alts++) { + + if (cmlb_partinfo(dkp->dk_cmlbhandle, alts, + &nblocks, &blk, NULL, &vtoctag, 0) != 0) { + /* no partition table exists */ + return (B_FALSE); + } + + if ((vtoctag == V_ALTSCTR) && (nblocks > 1)) + break; + } + if (alts >= NDKMAP) + return (B_FALSE); /* no V_ALTSCTR slice defined */ + + /* read in ALTS label block */ + ap = (struct alts_parttbl *)kmem_zalloc(NBPSCTR, KM_SLEEP); + if (pv_cmdk_lb_rdwr(dkp->dk_dip, + TG_READ, ap, blk, NBPSCTR, NULL) != 0) + goto err; + + altused = ap->alts_ent_used; /* number of BB entries */ + altbase = ap->alts_ent_base; /* blk offset from begin slice */ + altlast = ap->alts_ent_end; /* blk offset to last block */ + + if ((altused == 0) || (altbase < 1) || + (altbase > altlast) || (altlast >= nblocks)) + goto err; + + /* we found bad block mappins */ + kmem_free(ap, NBPSCTR); + return (B_TRUE); + +err: + kmem_free(ap, NBPSCTR); + return (B_FALSE); +} + +/* + * Autoconfiguration Routines + */ +static int +pv_cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + dev_info_t *xdf_dip = NULL; + struct pv_cmdk *dkp; + cmlb_geom_t pgeom; + char *path; + int i; + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + /* + * This cmdk device layers on top of an xdf device. So the first + * thing we need to do is determine which xdf device instance this + * cmdk instance should be layered on top of. + */ + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + for (i = 0; pv_cmdk_h2p[i].h2p_hvm_path != NULL; i++) { + if (strcmp(pv_cmdk_h2p[i].h2p_hvm_path, path) == 0) + break; + } + kmem_free(path, MAXPATHLEN); + + if (pv_cmdk_h2p[i].h2p_hvm_path == NULL) { + /* + * UhOh. We don't know what xdf instance this cmdk device + * should be mapped to. + */ + return (DDI_FAILURE); + } + + /* Check if this device exists */ + xdf_dip = xdf_hvm_hold(pv_cmdk_h2p[i].h2p_pv_path); + if (xdf_dip == NULL) + return (DDI_FAILURE); + + /* allocate and initialize our state structure */ + (void) ddi_soft_state_zalloc(pv_cmdk_state, instance); + dkp = ddi_get_soft_state(pv_cmdk_state, instance); + mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL); + dkp->dk_dip = dip; + dkp->dk_xdf_dip = xdf_dip; + dkp->dk_xdf_dev = makedevice(ddi_driver_major(xdf_dip), + XDF_MINOR(ddi_get_instance(xdf_dip), 0)); + + ASSERT((dkp->dk_xdf_dev & XDF_PMASK) == 0); + + /* + * GROSS HACK ALERT! GROSS HACK ALERT! + * + * Before we can initialize the cmlb layer, we have to tell the + * underlying xdf device what it's physical geometry should be. + * See the block comments at the top of this file for more info. + */ + if ((pv_cmdk_getpgeom(dip, &pgeom) != 0) || + (xdf_hvm_setpgeom(dkp->dk_xdf_dip, &pgeom) != 0)) { + ddi_release_devi(dkp->dk_xdf_dip); + mutex_destroy(&dkp->dk_mutex); + ddi_soft_state_free(pv_cmdk_state, instance); + return (DDI_FAILURE); + } + + /* create kstat for iostat(1M) */ + if (xdf_kstat_create(dkp->dk_xdf_dip, "cmdk", instance) != 0) { + ddi_release_devi(dkp->dk_xdf_dip); + mutex_destroy(&dkp->dk_mutex); + ddi_soft_state_free(pv_cmdk_state, instance); + return (DDI_FAILURE); + } + + /* + * Force the xdf front end driver to connect to the backend. From + * the solaris device tree perspective, the xdf driver devinfo node + * is already in the ATTACHED state. (Otherwise xdf_hvm_hold() + * would not have returned a dip.) But this doesn't mean that the + * xdf device has actually established a connection to it's back + * end driver. For us to be able to access the xdf device it needs + * to be connected. There are two ways to force the xdf driver to + * connect to the backend device. + */ + if (xdf_hvm_connect(dkp->dk_xdf_dip) != 0) { + cmn_err(CE_WARN, + "pv driver failed to connect: %s", + pv_cmdk_h2p[i].h2p_pv_path); + xdf_kstat_delete(dkp->dk_xdf_dip); + ddi_release_devi(dkp->dk_xdf_dip); + mutex_destroy(&dkp->dk_mutex); + ddi_soft_state_free(pv_cmdk_state, instance); + return (DDI_FAILURE); + } + + /* + * Initalize cmlb. Note that for partition information cmlb + * will access the underly xdf disk device directly via + * pv_cmdk_lb_rdwr() and pv_cmdk_lb_getinfo(). There are no + * layered driver handles associated with this access because + * it is a direct disk access that doesn't go through + * any of the device nodes exported by the xdf device (since + * all exported device nodes only reflect the portion of + * the device visible via the partition/slice that the node + * is associated with.) So while not observable via the LDI, + * this direct disk access is ok since we're actually holding + * the target device. + */ + cmlb_alloc_handle((cmlb_handle_t *)&dkp->dk_cmlbhandle); + if (cmlb_attach(dkp->dk_dip, &pv_cmdk_lb_ops, + DTYPE_DIRECT, /* device_type */ + 0, /* not removable */ + 0, /* not hot pluggable */ + DDI_NT_BLOCK, + CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* mimic cmdk */ + dkp->dk_cmlbhandle, 0) != 0) { + cmlb_free_handle(&dkp->dk_cmlbhandle); + xdf_kstat_delete(dkp->dk_xdf_dip); + ddi_release_devi(dkp->dk_xdf_dip); + mutex_destroy(&dkp->dk_mutex); + ddi_soft_state_free(pv_cmdk_state, instance); + return (DDI_FAILURE); + } + + if (pv_cmdk_bb_check(dkp)) { + cmn_err(CE_WARN, + "pv cmdk disks with bad blocks are unsupported: %s", + pv_cmdk_h2p[i].h2p_hvm_path); + + cmlb_detach(dkp->dk_cmlbhandle, 0); + cmlb_free_handle(&dkp->dk_cmlbhandle); + xdf_kstat_delete(dkp->dk_xdf_dip); + ddi_release_devi(dkp->dk_xdf_dip); + mutex_destroy(&dkp->dk_mutex); + ddi_soft_state_free(pv_cmdk_state, instance); + return (DDI_FAILURE); + } + + /* setup devid string */ + pv_cmdk_devid_setup(dkp); + + /* Calling validate will create minor nodes according to disk label */ + (void) cmlb_validate(dkp->dk_cmlbhandle, 0, 0); + + /* + * Add a zero-length attribute to tell the world we support + * kernel ioctls (for layered drivers). + */ + (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, + DDI_KERNEL_IOCTL, NULL, 0); + + /* Have the system report any newly created device nodes */ + ddi_report_dev(dip); + + return (DDI_SUCCESS); +} + +static int +pv_cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + ASSERT(MUTEX_NOT_HELD(&dkp->dk_mutex)); + + ddi_devid_unregister(dip); + if (dkp->dk_devid) + ddi_devid_free(dkp->dk_devid); + cmlb_detach(dkp->dk_cmlbhandle, 0); + cmlb_free_handle(&dkp->dk_cmlbhandle); + mutex_destroy(&dkp->dk_mutex); + xdf_kstat_delete(dkp->dk_xdf_dip); + ddi_release_devi(dkp->dk_xdf_dip); + ddi_soft_state_free(pv_cmdk_state, instance); + ddi_prop_remove_all(dip); + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +pv_cmdk_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, + void **result) +{ + dev_t dev = (dev_t)arg; + int instance = XDF_DEV2UNIT(dev); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + if (dkp == NULL) + return (DDI_FAILURE); + *result = (void *)dkp->dk_dip; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)(intptr_t)instance; + break; + default: + return (DDI_FAILURE); + } + return (DDI_SUCCESS); +} + +static int +pv_cmdk_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, + int flags, char *name, caddr_t valuep, int *lengthp) +{ + int instance = ddi_get_instance(dip); + struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); + dev_info_t *xdf_dip; + dev_t xdf_devt; + int err; + + /* + * Sanity check that if a dev_t or dip were specified that they + * correspond to this device driver. On debug kernels we'll + * panic and on non-debug kernels we'll return failure. + */ + ASSERT(ddi_driver_major(dip) == pv_cmdk_major); + ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == pv_cmdk_major)); + if ((ddi_driver_major(dip) != pv_cmdk_major) || + ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != pv_cmdk_major))) + return (DDI_PROP_NOT_FOUND); + + /* + * This property lookup might be associated with a device node + * that is not yet attached, if so pass it onto ddi_prop_op(). + */ + if (dkp == NULL) + return (ddi_prop_op(dev, dip, prop_op, flags, + name, valuep, lengthp)); + + /* + * Make sure we only lookup static properties. + * + * If there are static properties of the underlying xdf driver + * that we want to mirror, then we'll have to explicity look them + * up and define them during attach. There are a few reasons + * for this. Most importantly, most static properties are typed + * and all dynamic properties are untyped, ie, for dynamic + * properties the caller must know the type of the property and + * how to interpret the value of the property. the prop_op drivedr + * entry point is only designed for returning dynamic/untyped + * properties, so if we were to attempt to lookup and pass back + * static properties of the underlying device here then we would + * be losing the type information for those properties. Another + * reason we don't want to pass on static property requests is that + * static properties are enumerable in the device tree, where as + * dynamic ones are not. + */ + flags |= DDI_PROP_DYNAMIC; + + /* + * We can't use the ldi here to access the underlying device because + * the ldi actually opens the device, and that open might fail if the + * device has already been opened with the FEXCL flag. If we used + * the ldi here, it would also be possible for some other caller + * to try open the device with the FEXCL flag and get a failure + * back because we have it open to do a property query. + * + * Instad we'll grab a hold on the target dip and query the + * property directly. + */ + mutex_enter(&dkp->dk_mutex); + + if ((xdf_dip = dkp->dk_xdf_dip) == NULL) { + mutex_exit(&dkp->dk_mutex); + return (DDI_PROP_NOT_FOUND); + } + e_ddi_hold_devi(xdf_dip); + + /* figure out the dev_t we're going to pass on down */ + if (dev == DDI_DEV_T_ANY) { + xdf_devt = DDI_DEV_T_ANY; + } else { + xdf_devt = dkp->dk_xdf_dev | XDF_DEV2PART(dev); + } + + mutex_exit(&dkp->dk_mutex); + + /* + * Cdev_prop_op() is not a public interface, and normally the caller + * is required to make sure that the target driver actually implements + * this interface before trying to invoke it. In this case we know + * that we're always accessing the xdf driver and it does have this + * interface defined, so we can skip the check. + */ + err = cdev_prop_op(xdf_devt, xdf_dip, + prop_op, flags, name, valuep, lengthp); + ddi_release_devi(xdf_dip); + return (err); +} + +/* + * Device driver ops vector + */ +static struct cb_ops pv_cmdk_cb_ops = { + pv_cmdkopen, /* open */ + pv_cmdkclose, /* close */ + pv_cmdkstrategy, /* strategy */ + nodev, /* print */ + pv_cmdkdump, /* dump */ + pv_cmdkread, /* read */ + pv_cmdkwrite, /* write */ + pv_cmdkioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + pv_cmdk_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */ + CB_REV, /* cb_rev */ + pv_cmdkaread, /* async read */ + pv_cmdkawrite /* async write */ +}; + +struct dev_ops pv_cmdk_ops = { + DEVO_REV, /* devo_rev, */ + 0, /* refcnt */ + pv_cmdk_getinfo, /* info */ + nulldev, /* identify */ + nulldev, /* probe */ + pv_cmdkattach, /* attach */ + pv_cmdkdetach, /* detach */ + nodev, /* reset */ + &pv_cmdk_cb_ops, /* driver operations */ + (struct bus_ops *)0 /* bus operations */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* Type of module. This one is a driver */ + "PV Common Direct Access Disk", + &pv_cmdk_ops, /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modldrv, NULL +}; + +int +_init(void) +{ + int rval; + + if ((pv_cmdk_major = ddi_name_to_major("cmdk")) == (major_t)-1) + return (EINVAL); + + /* + * In general ide usually supports 4 disk devices, this same + * limitation also applies to software emulating ide devices. + * so by default we pre-allocate 4 cmdk soft state structures. + */ + if ((rval = ddi_soft_state_init(&pv_cmdk_state, + sizeof (struct pv_cmdk), PV_CMDK_NODES)) != 0) + return (rval); + + /* + * Currently we only support qemu as the backing hardware emulator + * for cmdk devices. + */ + pv_cmdk_h2p = pv_cmdk_h2p_xen_qemu; + + /* Install our module */ + if ((rval = mod_install(&modlinkage)) != 0) { + ddi_soft_state_fini(&pv_cmdk_state); + return (rval); + } + + return (0); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int rval; + if ((rval = mod_remove(&modlinkage)) != 0) + return (rval); + ddi_soft_state_fini(&pv_cmdk_state); + return (0); +} diff --git a/usr/src/uts/i86pc/i86hvm/io/pv_rtls.c b/usr/src/uts/i86pc/i86hvm/io/pv_rtls.c new file mode 100644 index 0000000000..40303870b4 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/pv_rtls.c @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Fake rtls module. Prevents the real rtls driver from loading in + * a xen HVM domain so that xnf may operate instead. + */ + +#include <sys/sunddi.h> +#include <sys/errno.h> +#include <sys/modctl.h> + +struct dev_ops pv_rtls_ops = { + DEVO_REV, + 0, + NULL, + nulldev, + nulldev, + NULL, + NULL, + nodev, + NULL, + NULL +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, + "xVM rtls stub %I%", + &pv_rtls_ops +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modldrv, NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (EBUSY); +} diff --git a/usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c b/usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c new file mode 100644 index 0000000000..14d5bcc4b9 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/xpv/evtchn.c @@ -0,0 +1,389 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/xpv_support.h> +#include <sys/hypervisor.h> +#include <sys/machsystm.h> +#include <sys/mutex.h> +#include <sys/cmn_err.h> +#include <sys/dditypes.h> +#include <sys/atomic.h> +#include <sys/sysmacros.h> +#include <sys/cpu.h> +#include <sys/psw.h> +#include <sys/psm.h> +#include <sys/sdt.h> + +extern dev_info_t *xpv_dip; +static ddi_intr_handle_t *evtchn_ihp = NULL; +static ddi_softint_handle_t evtchn_to_handle[NR_EVENT_CHANNELS]; +kmutex_t ec_lock; + +static int evtchn_callback_irq = -1; + +static volatile ulong_t *pending_events; +static volatile ulong_t *masked_events; + +/* log2(NBBY * sizeof (ulong)) */ +#ifdef __amd64 +#define EVTCHN_SHIFT 6 +#else /* __i386 */ +#define EVTCHN_SHIFT 5 +#endif + +/* Atomically get and clear a ulong from memory. */ +#define GET_AND_CLEAR(src, targ) { \ + membar_enter(); \ + do { \ + targ = *src; \ + } while (atomic_cas_ulong(src, targ, 0) != targ); \ +} + +/* Get the first and last bits set in a bitmap */ +#define GET_BOUNDS(bitmap, low, high) { \ + int _i; \ + low = high = -1; \ + for (_i = 0; _i <= sizeof (ulong_t); _i++) \ + if (bitmap & (1UL << _i)) { \ + if (low == -1) \ + low = _i; \ + high = _i; \ + } \ +} + +void +ec_bind_evtchn_to_handler(int evtchn, pri_t pri, ec_handler_fcn_t handler, + void *arg1) +{ + ddi_softint_handle_t hdl; + + if (evtchn < 0 || evtchn > NR_EVENT_CHANNELS) { + cmn_err(CE_WARN, "Binding invalid event channel: %d", evtchn); + return; + } + + (void) ddi_intr_add_softint(xpv_dip, &hdl, pri, handler, (caddr_t)arg1); + mutex_enter(&ec_lock); + ASSERT(evtchn_to_handle[evtchn] == NULL); + evtchn_to_handle[evtchn] = hdl; + mutex_exit(&ec_lock); + + /* Let the hypervisor know we're prepared to handle this event */ + hypervisor_unmask_event(evtchn); +} + +void +ec_unbind_evtchn(int evtchn) +{ + evtchn_close_t close; + ddi_softint_handle_t hdl; + + if (evtchn < 0 || evtchn > NR_EVENT_CHANNELS) { + cmn_err(CE_WARN, "Unbinding invalid event channel: %d", evtchn); + return; + } + + /* + * Let the hypervisor know we're no longer prepared to handle this + * event + */ + hypervisor_mask_event(evtchn); + + /* Cleanup the event handler metadata */ + mutex_enter(&ec_lock); + hdl = evtchn_to_handle[evtchn]; + evtchn_to_handle[evtchn] = NULL; + mutex_exit(&ec_lock); + + close.port = evtchn; + (void) HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + (void) ddi_intr_remove_softint(hdl); +} + +void +ec_notify_via_evtchn(unsigned int port) +{ + evtchn_send_t send; + + if ((int)port == -1) + return; + send.port = port; + (void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); +} + +void +hypervisor_unmask_event(unsigned int ev) +{ + int index = ev >> EVTCHN_SHIFT; + ulong_t bit = 1UL << (ev & ((1UL << EVTCHN_SHIFT) - 1)); + volatile ulong_t *maskp; + evtchn_unmask_t unmask; + + /* + * index,bit contain the event number as an index into the + * masked-events bitmask. Set it to 0. + */ + maskp = &masked_events[index]; + atomic_and_ulong(maskp, ~bit); + + /* Let the hypervisor know the event has been unmasked */ + unmask.port = ev; + if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0) + panic("xen_evtchn_unmask() failed"); +} + +/* Set a bit in an evtchan mask word */ +void +hypervisor_mask_event(uint_t ev) +{ + int index = ev >> EVTCHN_SHIFT; + ulong_t bit = 1UL << (ev & ((1UL << EVTCHN_SHIFT) - 1)); + volatile ulong_t *maskp; + + maskp = &masked_events[index]; + atomic_or_ulong(maskp, bit); +} + +void +hypervisor_clear_event(uint_t ev) +{ + int index = ev >> EVTCHN_SHIFT; + ulong_t bit = 1UL << (ev & ((1UL << EVTCHN_SHIFT) - 1)); + volatile ulong_t *maskp; + + maskp = &pending_events[index]; + atomic_and_ulong(maskp, ~bit); +} + +int +xen_alloc_unbound_evtchn(int domid, int *evtchnp) +{ + evtchn_alloc_unbound_t alloc; + int err; + + alloc.dom = DOMID_SELF; + alloc.remote_dom = (domid_t)domid; + + if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc)) == 0) { + *evtchnp = alloc.port; + /* ensure evtchn is masked till we're ready to use it */ + (void) hypervisor_mask_event(*evtchnp); + } else { + err = xen_xlate_errcode(err); + } + + return (err); +} + +int +xen_bind_interdomain(int domid, int remote_port, int *port) +{ + evtchn_bind_interdomain_t bind; + int err; + + bind.remote_dom = (domid_t)domid; + bind.remote_port = remote_port; + if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind)) == 0) + *port = bind.local_port; + else + err = xen_xlate_errcode(err); + return (err); +} + +/*ARGSUSED*/ +uint_t +evtchn_callback_fcn(caddr_t arg0, caddr_t arg1) +{ + ulong_t pending_word; + int i, j, port; + volatile struct vcpu_info *vci; + uint_t rv = DDI_INTR_UNCLAIMED; + ddi_softint_handle_t hdl; + int low, high; + ulong_t sels; + + vci = &HYPERVISOR_shared_info->vcpu_info[CPU->cpu_id]; + +again: + DTRACE_PROBE2(evtchn__scan__start, int, vci->evtchn_upcall_pending, + ulong_t, vci->evtchn_pending_sel); + + atomic_and_8(&vci->evtchn_upcall_pending, 0); + + /* + * Find the upper and lower bounds in which we need to search for + * pending events. + */ + GET_AND_CLEAR(&vci->evtchn_pending_sel, sels); + + /* sels == 1 is by far the most common case. Make it fast */ + if (sels == 1) + low = high = 0; + else if (sels == 0) + return (rv); + else + GET_BOUNDS(sels, low, high); + + /* Scan the port list, looking for words with bits set */ + for (i = low; i <= high; i++) { + ulong_t tmp; + + GET_AND_CLEAR(&pending_events[i], tmp); + pending_word = tmp & ~(masked_events[i]); + + /* Scan the bits in the word, looking for pending events */ + while (pending_word != 0) { + j = lowbit(pending_word) - 1; + port = (i << EVTCHN_SHIFT) + j; + pending_word = pending_word & ~(1 << j); + + /* + * If there is a handler registered for this event, + * schedule a softint of the appropriate priority + * to execute it. + */ + if ((hdl = evtchn_to_handle[port]) != NULL) { + (void) ddi_intr_trigger_softint(hdl, NULL); + rv = DDI_INTR_CLAIMED; + } + } + } + DTRACE_PROBE2(evtchn__scan__end, int, vci->evtchn_upcall_pending, + ulong_t, vci->evtchn_pending_sel); + + if ((volatile uint8_t)vci->evtchn_upcall_pending || + ((volatile ulong_t)vci->evtchn_pending_sel)) + goto again; + + return (rv); +} + +static int +set_hvm_callback(int irq) +{ + struct xen_hvm_param xhp; + + xhp.domid = DOMID_SELF; + xhp.index = HVM_PARAM_CALLBACK_IRQ; + xhp.value = irq; + return (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp)); +} + +void +ec_fini() +{ + int i; + + for (i = 0; i < NR_EVENT_CHANNELS; i++) + ec_unbind_evtchn(i); + + evtchn_callback_irq = -1; + if (evtchn_ihp != NULL) { + (void) ddi_intr_disable(*evtchn_ihp); + (void) ddi_intr_remove_handler(*evtchn_ihp); + (void) ddi_intr_free(*evtchn_ihp); + kmem_free(evtchn_ihp, sizeof (ddi_intr_handle_t)); + evtchn_ihp = NULL; + } +} + +int +ec_init(dev_info_t *dip) +{ + int i; + int rv, actual; + ddi_intr_handle_t *ihp; + + /* + * Translate the variable-sized pending and masked event bitmasks + * into constant-sized arrays of uint32_t's. + */ + pending_events = &HYPERVISOR_shared_info->evtchn_pending[0]; + masked_events = &HYPERVISOR_shared_info->evtchn_mask[0]; + + /* + * Clear our event handler structures and prevent the hypervisor + * from triggering any events. + */ + mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7)); + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + evtchn_to_handle[i] = NULL; + (void) hypervisor_mask_event(i); + } + + /* + * Allocate and initialize an interrupt handler to process the + * hypervisor's "hey you have events pending!" interrupt. + */ + ihp = kmem_zalloc(sizeof (ddi_intr_handle_t), KM_SLEEP); + rv = ddi_intr_alloc(dip, ihp, DDI_INTR_TYPE_FIXED, 0, 1, &actual, + DDI_INTR_ALLOC_NORMAL); + if (rv < 0 || actual != 1) { + cmn_err(CE_WARN, "Could not allocate evtchn interrupt: %d", + rv); + return (-1); + } + + rv = ddi_intr_add_handler(*ihp, evtchn_callback_fcn, NULL, NULL); + if (rv < 0) { + (void) ddi_intr_free(*ihp); + cmn_err(CE_WARN, "Could not attach evtchn handler"); + return (-1); + } + evtchn_ihp = ihp; + + if (ddi_intr_enable(*ihp) != DDI_SUCCESS) { + cmn_err(CE_WARN, "Could not enable evtchn interrupts\n"); + return (-1); + } + + /* Tell the hypervisor which interrupt we're waiting on. */ + evtchn_callback_irq = ((ddi_intr_handle_impl_t *)*ihp)->ih_vector; + + if (set_hvm_callback(evtchn_callback_irq) != 0) { + cmn_err(CE_WARN, "Couldn't register evtchn callback"); + return (-1); + } + return (0); +} + +void +ec_resume(void) +{ + int i; + + /* New event-channel space is not 'live' yet. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) + (void) hypervisor_mask_event(i); + if (set_hvm_callback(evtchn_callback_irq) != 0) + cmn_err(CE_WARN, "Couldn't register evtchn callback"); + +} diff --git a/usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf b/usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf new file mode 100644 index 0000000000..d599f6f3ff --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/xpv/xpv.conf @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# ident "%Z%%M% %I% %E% SMI" + +interrupt-priorities=9; diff --git a/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c b/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c new file mode 100644 index 0000000000..f5de99a175 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c @@ -0,0 +1,956 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/modctl.h> +#include <sys/types.h> +#include <sys/archsystm.h> +#include <sys/machsystm.h> +#include <sys/sunndi.h> +#include <sys/sunddi.h> +#include <sys/ddi_subrdefs.h> +#include <sys/xpv_support.h> +#include <sys/xen_errno.h> +#include <sys/hypervisor.h> +#include <sys/gnttab.h> +#include <sys/xenbus_comms.h> +#include <sys/xenbus_impl.h> +#include <xen/sys/xendev.h> +#include <sys/sysmacros.h> +#include <sys/x86_archext.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/pc_mmu.h> +#include <sys/cmn_err.h> +#include <sys/cpr.h> +#include <sys/ddi.h> +#include <vm/seg_kmem.h> +#include <vm/as.h> +#include <vm/hat_pte.h> +#include <vm/hat_i86.h> + +#define XPV_MINOR 0 +#define XPV_BUFSIZE 128 + +/* + * This structure is ordinarily constructed by Xen. In the HVM world, we + * manually fill in the few fields the PV drivers need. + */ +start_info_t *xen_info = NULL; + +/* Xen version number. */ +int xen_major, xen_minor; + +/* Metadata page shared between domain and Xen */ +shared_info_t *HYPERVISOR_shared_info = NULL; + +/* Page containing code to issue hypercalls. */ +extern caddr_t hypercall_page; + +/* Is the hypervisor 64-bit? */ +int xen_is_64bit = -1; + +/* virtual addr for the store_mfn page */ +caddr_t xb_addr; + +dev_info_t *xpv_dip; +static dev_info_t *xpvd_dip; + +/* saved pfn of the shared info page */ +static pfn_t shared_info_frame; + +#ifdef DEBUG +int xen_suspend_debug; + +#define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf +#else +#define SUSPEND_DEBUG(...) +#endif + +/* + * Forward declarations + */ +static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); +static int xpv_attach(dev_info_t *, ddi_attach_cmd_t); +static int xpv_detach(dev_info_t *, ddi_detach_cmd_t); +static int xpv_open(dev_t *, int, int, cred_t *); +static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); + +static struct cb_ops xpv_cb_ops = { + xpv_open, + nulldev, /* close */ + nodev, /* strategy */ + nodev, /* print */ + nodev, /* dump */ + nodev, /* read */ + nodev, /* write */ + xpv_ioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, + NULL, + D_MP, + CB_REV, + NULL, + NULL +}; + +static struct dev_ops xpv_dv_ops = { + DEVO_REV, + 0, + xpv_getinfo, + nulldev, /* identify */ + nulldev, /* probe */ + xpv_attach, + xpv_detach, + nodev, /* reset */ + &xpv_cb_ops, + NULL, /* struct bus_ops */ + NULL /* power */ +}; + +static struct modldrv modldrv = { + &mod_driverops, + "xpv driver %I%", + &xpv_dv_ops +}; + +static struct modlinkage modl = { + MODREV_1, + { + (void *)&modldrv, + NULL /* null termination */ + } +}; + +static ddi_dma_attr_t xpv_dma_attr = { + DMA_ATTR_V0, /* version of this structure */ + 0, /* lowest usable address */ + 0xffffffffffffffffULL, /* highest usable address */ + 0x7fffffff, /* maximum DMAable byte count */ + MMU_PAGESIZE, /* alignment in bytes */ + 0x7ff, /* bitmap of burst sizes */ + 1, /* minimum transfer */ + 0xffffffffU, /* maximum transfer */ + 0x7fffffffULL, /* maximum segment length */ + 1, /* maximum number of segments */ + 1, /* granularity */ + 0, /* flags (reserved) */ +}; + +static ddi_device_acc_attr_t xpv_accattr = { + DDI_DEVICE_ATTR_V0, + DDI_NEVERSWAP_ACC, + DDI_STRICTORDER_ACC +}; + +#define MAX_ALLOCATIONS 10 +static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS]; +static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS]; +static int xen_alloc_cnt = 0; + +void * +xen_alloc_pages(pgcnt_t cnt) +{ + size_t len; + int a = xen_alloc_cnt++; + caddr_t addr; + + ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS); + if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0, + &xpv_dma_handle[a]) != DDI_SUCCESS) + return (NULL); + + if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt, + &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, + &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) { + ddi_dma_free_handle(&xpv_dma_handle[a]); + cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices"); + return (NULL); + } + return (addr); +} + +/* + * This function is invoked twice, first time with reprogram=0 to set up + * the xpvd portion of the device tree. The second time it is ignored. + */ +static void +xpv_enumerate(int reprogram) +{ + dev_info_t *dip; + + if (reprogram != 0) + return; + + ndi_devi_alloc_sleep(ddi_root_node(), "xpvd", + (pnode_t)DEVI_SID_NODEID, &dip); + + (void) ndi_devi_bind_driver(dip, 0); + + /* + * Too early to enumerate split device drivers in domU + * since we need to create taskq thread during enumeration. + * So, we only enumerate softdevs and console here. + */ + xendev_enum_all(dip, B_TRUE); +} + +/* + * Translate a hypervisor errcode to a Solaris error code. + */ +int +xen_xlate_errcode(int error) +{ +#define CASE(num) case X_##num: error = num; break + + switch (-error) { + CASE(EPERM); CASE(ENOENT); CASE(ESRCH); + CASE(EINTR); CASE(EIO); CASE(ENXIO); + CASE(E2BIG); CASE(ENOMEM); CASE(EACCES); + CASE(EFAULT); CASE(EBUSY); CASE(EEXIST); + CASE(ENODEV); CASE(EISDIR); CASE(EINVAL); + CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS); + CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN); + CASE(ENODATA); + default: + panic("xen_xlate_errcode: unknown error %d", error); + } + return (error); +#undef CASE +} + +/*PRINTFLIKE1*/ +void +xen_printf(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + printf(fmt, adx); + va_end(adx); +} + +/* + * Stub functions to get the FE drivers to build, and to catch drivers that + * misbehave in HVM domains. + */ +/*ARGSUSED*/ +void +xen_release_pfn(pfn_t pfn, caddr_t va) +{ + panic("xen_release_pfn() is not supported in HVM domains"); +} + +/*ARGSUSED*/ +void +reassign_pfn(pfn_t pfn, mfn_t mfn) +{ + panic("reassign_pfn() is not supported in HVM domains"); +} + +/*ARGSUSED*/ +long +balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns) +{ + panic("balloon_free_pages() is not supported in HVM domains"); + return (0); +} + +/*ARGSUSED*/ +void +balloon_drv_added(int64_t delta) +{ + panic("balloon_drv_added() is not supported in HVM domains"); +} + +/* + * Add a mapping for the machine page at the given virtual address. + */ +void +kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) +{ + ASSERT(level == 0); + + hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE, + mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD); +} + +static uint64_t +hvm_get_param(int param_id) +{ + struct xen_hvm_param xhp; + + xhp.domid = DOMID_SELF; + xhp.index = param_id; + if ((HYPERVISOR_hvm_op(HVMOP_get_param, &xhp) < 0)) + return (-1); + return (xhp.value); +} + +static struct xenbus_watch shutdown_watch; +taskq_t *xen_shutdown_tq; + +#define SHUTDOWN_INVALID -1 +#define SHUTDOWN_POWEROFF 0 +#define SHUTDOWN_REBOOT 1 +#define SHUTDOWN_SUSPEND 2 +#define SHUTDOWN_HALT 3 +#define SHUTDOWN_MAX 4 + +#define SHUTDOWN_TIMEOUT_SECS (60 * 5) + +static const char *cmd_strings[SHUTDOWN_MAX] = { + "poweroff", + "reboot", + "suspend", + "halt" +}; + +int +xen_suspend_devices(dev_info_t *dip) +{ + int error; + char buf[XPV_BUFSIZE]; + + SUSPEND_DEBUG("xen_suspend_devices\n"); + + for (; dip != NULL; dip = ddi_get_next_sibling(dip)) { + if (xen_suspend_devices(ddi_get_child(dip))) + return (ENXIO); + if (ddi_get_driver(dip) == NULL) + continue; + SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf)); + ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0); + + + if (!i_ddi_devi_attached(dip)) { + error = DDI_FAILURE; + } else { + error = devi_detach(dip, DDI_SUSPEND); + } + + if (error == DDI_SUCCESS) { + DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED; + } else { + SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n", + ddi_deviname(dip, buf)); + cmn_err(CE_WARN, "Unable to suspend device %s.", + ddi_deviname(dip, buf)); + cmn_err(CE_WARN, "Device is busy or does not " + "support suspend/resume."); + return (ENXIO); + } + } + return (0); +} + +int +xen_resume_devices(dev_info_t *start, int resume_failed) +{ + dev_info_t *dip, *next, *last = NULL; + int did_suspend; + int error = resume_failed; + char buf[XPV_BUFSIZE]; + + SUSPEND_DEBUG("xen_resume_devices\n"); + + while (last != start) { + dip = start; + next = ddi_get_next_sibling(dip); + while (next != last) { + dip = next; + next = ddi_get_next_sibling(dip); + } + + /* + * cpr is the only one that uses this field and the device + * itself hasn't resumed yet, there is no need to use a + * lock, even though kernel threads are active by now. + */ + did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED; + if (did_suspend) + DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED; + + /* + * There may be background attaches happening on devices + * that were not originally suspended by cpr, so resume + * only devices that were suspended by cpr. Also, stop + * resuming after the first resume failure, but traverse + * the entire tree to clear the suspend flag. + */ + if (did_suspend && !error) { + SUSPEND_DEBUG("Resuming device %s\n", + ddi_deviname(dip, buf)); + /* + * If a device suspended by cpr gets detached during + * the resume process (for example, due to hotplugging) + * before cpr gets around to issuing it a DDI_RESUME, + * we'll have problems. + */ + if (!i_ddi_devi_attached(dip)) { + cmn_err(CE_WARN, "Skipping %s, device " + "not ready for resume", + ddi_deviname(dip, buf)); + } else { + if (devi_attach(dip, DDI_RESUME) != + DDI_SUCCESS) { + error = ENXIO; + } + } + } + + if (error == ENXIO) { + cmn_err(CE_WARN, "Unable to resume device %s", + ddi_deviname(dip, buf)); + } + + error = xen_resume_devices(ddi_get_child(dip), error); + last = dip; + } + + return (error); +} + +/*ARGSUSED*/ +static int +check_xpvd(dev_info_t *dip, void *arg) +{ + char *name; + + name = ddi_node_name(dip); + if (name == NULL || strcmp(name, "xpvd")) { + return (DDI_WALK_CONTINUE); + } else { + xpvd_dip = dip; + return (DDI_WALK_TERMINATE); + } +} + +/* + * Top level routine to direct suspend/resume of a domain. + */ +void +xen_suspend_domain(void) +{ + extern void rtcsync(void); + extern void ec_resume(void); + extern kmutex_t ec_lock; + struct xen_add_to_physmap xatp; + ulong_t flags; + int err; + + cmn_err(CE_NOTE, "Domain suspending for save/migrate"); + + SUSPEND_DEBUG("xen_suspend_domain\n"); + + /* + * We only want to suspend the PV devices, since the emulated devices + * are suspended by saving the emulated device state. The PV devices + * are all children of the xpvd nexus device. So we search the + * device tree for the xpvd node to use as the root of the tree to + * be suspended. + */ + if (xpvd_dip == NULL) + ddi_walk_devs(ddi_root_node(), check_xpvd, NULL); + + /* + * suspend interrupts and devices + */ + if (xpvd_dip != NULL) + (void) xen_suspend_devices(ddi_get_child(xpvd_dip)); + else + cmn_err(CE_WARN, "No PV devices found to suspend"); + SUSPEND_DEBUG("xenbus_suspend\n"); + xenbus_suspend(); + + mutex_enter(&cpu_lock); + + /* + * Suspend on vcpu 0 + */ + thread_affinity_set(curthread, 0); + kpreempt_disable(); + + if (ncpus > 1) + pause_cpus(NULL); + /* + * We can grab the ec_lock as it's a spinlock with a high SPL. Hence + * any holder would have dropped it to get through pause_cpus(). + */ + mutex_enter(&ec_lock); + + /* + * From here on in, we can't take locks. + */ + + flags = intr_clear(); + + SUSPEND_DEBUG("HYPERVISOR_suspend\n"); + /* + * At this point we suspend and sometime later resume. + * Note that this call may return with an indication of a cancelled + * for now no matter ehat the return we do a full resume of all + * suspended drivers, etc. + */ + (void) HYPERVISOR_shutdown(SHUTDOWN_suspend); + + /* + * Point HYPERVISOR_shared_info to the proper place. + */ + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = shared_info_frame; + if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) + panic("Could not set shared_info page. error: %d", err); + + SUSPEND_DEBUG("gnttab_resume\n"); + gnttab_resume(); + + SUSPEND_DEBUG("ec_resume\n"); + ec_resume(); + + intr_restore(flags); + + if (ncpus > 1) + start_cpus(); + + mutex_exit(&ec_lock); + mutex_exit(&cpu_lock); + + /* + * Now we can take locks again. + */ + + rtcsync(); + + SUSPEND_DEBUG("xenbus_resume\n"); + xenbus_resume(); + SUSPEND_DEBUG("xen_resume_devices\n"); + if (xpvd_dip != NULL) + (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0); + + thread_affinity_clear(curthread); + kpreempt_enable(); + + SUSPEND_DEBUG("finished xen_suspend_domain\n"); + + cmn_err(CE_NOTE, "domain restore/migrate completed"); +} + +static void +xen_dirty_shutdown(void *arg) +{ + int cmd = (uintptr_t)arg; + + cmn_err(CE_WARN, "Externally requested shutdown failed or " + "timed out.\nShutting down.\n"); + + switch (cmd) { + case SHUTDOWN_HALT: + case SHUTDOWN_POWEROFF: + (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred); + break; + case SHUTDOWN_REBOOT: + (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred); + break; + } +} + +static void +xen_shutdown(void *arg) +{ + nvlist_t *attr_list = NULL; + sysevent_t *event = NULL; + sysevent_id_t eid; + int cmd = (uintptr_t)arg; + int err; + + ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX); + + if (cmd == SHUTDOWN_SUSPEND) { + xen_suspend_domain(); + return; + } + + err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME, KM_SLEEP); + if (err != DDI_SUCCESS) + goto failure; + + err = nvlist_add_string(attr_list, "shutdown", cmd_strings[cmd]); + if (err != DDI_SUCCESS) + goto failure; + + if ((event = sysevent_alloc("EC_xpvsys", "control", "SUNW:kern:xpv", + SE_SLEEP)) == NULL) + goto failure; + (void) sysevent_attach_attributes(event, + (sysevent_attr_list_t *)attr_list); + + err = log_sysevent(event, SE_SLEEP, &eid); + + sysevent_detach_attributes(event); + sysevent_free(event); + + if (err != 0) + goto failure; + + (void) timeout(xen_dirty_shutdown, arg, + SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC)); + + nvlist_free(attr_list); + return; + +failure: + if (attr_list != NULL) + nvlist_free(attr_list); + xen_dirty_shutdown(arg); +} + +/*ARGSUSED*/ +static void +xen_shutdown_handler(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + char *str; + xenbus_transaction_t xbt; + int err, shutdown_code = SHUTDOWN_INVALID; + unsigned int slen; + +again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) { + (void) xenbus_transaction_end(xbt, 1); + return; + } + + SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str); + + /* + * If this is a watch fired from our write below, check out early to + * avoid an infinite loop. + */ + if (strcmp(str, "") == 0) { + (void) xenbus_transaction_end(xbt, 0); + kmem_free(str, slen); + return; + } else if (strcmp(str, "poweroff") == 0) { + shutdown_code = SHUTDOWN_POWEROFF; + } else if (strcmp(str, "reboot") == 0) { + shutdown_code = SHUTDOWN_REBOOT; + } else if (strcmp(str, "suspend") == 0) { + shutdown_code = SHUTDOWN_SUSPEND; + } else if (strcmp(str, "halt") == 0) { + shutdown_code = SHUTDOWN_HALT; + } else { + printf("Ignoring shutdown request: %s\n", str); + } + + (void) xenbus_write(xbt, "control", "shutdown", ""); + err = xenbus_transaction_end(xbt, 0); + if (err == EAGAIN) { + SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id); + kmem_free(str, slen); + goto again; + } + + kmem_free(str, slen); + if (shutdown_code != SHUTDOWN_INVALID) { + (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown, + (void *)(intptr_t)shutdown_code, 0); + } +} + +static int +xen_pv_init(dev_info_t *xpv_dip) +{ + struct cpuid_regs cp; + uint32_t xen_signature[4]; + char *xen_str; + struct xen_add_to_physmap xatp; + xen_capabilities_info_t caps; + pfn_t pfn; + uint64_t msrval; + int err; + + /* + * Xen's pseudo-cpuid function 0x40000000 returns a string + * representing the Xen signature in %ebx, %ecx, and %edx. + * %eax contains the maximum supported cpuid function. + */ + cp.cp_eax = 0x40000000; + (void) __cpuid_insn(&cp); + xen_signature[0] = cp.cp_ebx; + xen_signature[1] = cp.cp_ecx; + xen_signature[2] = cp.cp_edx; + xen_signature[3] = 0; + xen_str = (char *)xen_signature; + if (strcmp("XenVMMXenVMM", xen_str) != 0 || + cp.cp_eax < 0x40000002) { + cmn_err(CE_WARN, + "Attempting to load Xen drivers on non-Xen system"); + return (-1); + } + + /* + * cpuid function 0x40000001 returns the Xen version in %eax. The + * top 16 bits are the major version, the bottom 16 are the minor + * version. + */ + cp.cp_eax = 0x40000001; + (void) __cpuid_insn(&cp); + xen_major = cp.cp_eax >> 16; + xen_minor = cp.cp_eax & 0xffff; + + /* + * The xpv driver is incompatible with xen versions older than 3.1. This + * is due to the changes in the vcpu_info and shared_info structs used + * to communicate with the hypervisor (the event channels in particular) + * that were introduced with 3.1. + */ + if (xen_major < 3 || (xen_major == 3 && xen_minor < 1)) { + cmn_err(CE_WARN, "Xen version %d.%d is not supported", + xen_major, xen_minor); + return (-1); + } + + /* + * cpuid function 0x40000002 returns information about the + * hypercall page. %eax nominally contains the number of pages + * with hypercall code, but according to the Xen guys, "I'll + * guarantee that remains one forever more, so you can just + * allocate a single page and get quite upset if you ever see CPUID + * return more than one page." %ebx contains an MSR we use to ask + * Xen to remap each page at a specific pfn. + */ + cp.cp_eax = 0x40000002; + (void) __cpuid_insn(&cp); + + /* + * Let Xen know where we want the hypercall page mapped. We + * already have a page allocated in the .text section to simplify + * the wrapper code. + */ + pfn = hat_getpfnum(kas.a_hat, (caddr_t)&hypercall_page); + msrval = mmu_ptob(pfn); + wrmsr(cp.cp_ebx, msrval); + + /* Fill in the xen_info data */ + xen_info = kmem_zalloc(sizeof (start_info_t), KM_SLEEP); + (void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor); + xen_info->store_mfn = (mfn_t)hvm_get_param(HVM_PARAM_STORE_PFN); + xen_info->store_evtchn = (int)hvm_get_param(HVM_PARAM_STORE_EVTCHN); + + /* Figure out whether the hypervisor is 32-bit or 64-bit. */ + if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) { + ((char *)(caps))[sizeof (caps) - 1] = '\0'; + if (strstr(caps, "x86_64") != NULL) + xen_is_64bit = 1; + else if (strstr(caps, "x86_32") != NULL) + xen_is_64bit = 0; + } + if (xen_is_64bit < 0) { + cmn_err(CE_WARN, "Couldn't get capability info from Xen."); + return (-1); + } +#ifdef __amd64 + ASSERT(xen_is_64bit == 1); +#endif + + /* + * Allocate space for the shared_info page and tell Xen where it + * is. + */ + HYPERVISOR_shared_info = xen_alloc_pages(1); + shared_info_frame = hat_getpfnum(kas.a_hat, + (caddr_t)HYPERVISOR_shared_info); + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = shared_info_frame; + if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) { + cmn_err(CE_WARN, "Could not get shared_info page from Xen." + " error: %d", err); + return (-1); + } + + /* Set up the grant tables. */ + gnttab_init(); + + /* Set up event channel support */ + if (ec_init(xpv_dip) != 0) + return (-1); + + /* Set up xenbus */ + xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); + xs_early_init(); + xs_domu_init(); + + /* Set up for suspend/resume/migrate */ + xen_shutdown_tq = taskq_create("shutdown_taskq", 1, + maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE); + shutdown_watch.node = "control/shutdown"; + shutdown_watch.callback = xen_shutdown_handler; + if (register_xenbus_watch(&shutdown_watch)) + cmn_err(CE_WARN, "Failed to set shutdown watcher"); + + return (0); +} + +static void +xen_pv_fini() +{ + if (xen_info != NULL) + kmem_free(xen_info, sizeof (start_info_t)); + ec_fini(); +} + +/*ARGSUSED*/ +static int +xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) +{ + if (getminor((dev_t)arg) != XPV_MINOR) + return (DDI_FAILURE); + + switch (cmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = xpv_dip; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = 0; + break; + default: + return (DDI_FAILURE); + } + + return (DDI_SUCCESS); +} + +static int +xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, + ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + xpv_dip = dip; + + if (xen_pv_init(dip) != 0) + return (DDI_FAILURE); + + ddi_report_dev(dip); + + /* + * If the memscrubber attempts to scrub the pages we hand to Xen, + * the domain will panic. + */ + memscrub_disable(); + + /* + * Report our version to dom0. + */ + if (xenbus_printf(XBT_NULL, "hvmpv/xpv", "version", "%d", + HVMPV_XPV_VERS)) + cmn_err(CE_WARN, "xpv: couldn't write version\n"); + + return (DDI_SUCCESS); +} + +/* + * Attempts to reload the PV driver plumbing hang on Intel platforms, so + * we don't want to unload the framework by accident. + */ +int xpv_allow_detach = 0; + +static int +xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + if (cmd != DDI_DETACH || xpv_allow_detach == 0) + return (DDI_FAILURE); + + if (xpv_dip != NULL) { + xen_pv_fini(); + ddi_remove_minor_node(dip, NULL); + xpv_dip = NULL; + } + + return (DDI_SUCCESS); +} + +/*ARGSUSED1*/ +static int +xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr) +{ + return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO); +} + +/*ARGSUSED*/ +static int +xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, + int *rval_p) +{ + return (EINVAL); +} + +int +_init(void) +{ + int err; + + if ((err = mod_install(&modl)) != 0) + return (err); + + impl_bus_add_probe(xpv_enumerate); + return (0); +} + +int +_fini(void) +{ + int err; + + if ((err = mod_remove(&modl)) != 0) + return (err); + + impl_bus_delete_probe(xpv_enumerate); + return (0); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modl, modinfop)); +} diff --git a/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile b/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile new file mode 100644 index 0000000000..5ae59dd0bb --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile @@ -0,0 +1,102 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/pv_cmdk/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the xdc driver. +# +# i86pc implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = cmdk +OBJECTS = $(PV_CMDK_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(PV_CMDK_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# When generating lint libraries, we want the name of the lint module +# that will be generated to by pv_cmdk and not cmdk, so override the +# default lint module name here. +# +LINT_MODULE = pv_cmdk + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(LINT_MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +LDFLAGS += -dy -Nmisc/strategy -Nmisc/cmlb +LDFLAGS += -Ndrv/xpvd -Ndrv/xdf + +# +# The Xen header files do not lint cleanly. Since the troublesome +# structures form part of the externally defined interface to the +# hypervisor, we're stuck with the noise. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile b/usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile new file mode 100644 index 0000000000..a2cabdef52 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/pv_rtls/Makefile @@ -0,0 +1,90 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/pv_rtls/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the null rtls module for xvm. +# +# i86pc implementation architecture dependent +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = rtls +OBJECTS = $(PV_RTLS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(PV_RTLS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# When generating lint libraries, we want the name of the lint module +# that will be generated to be pv_rtls and not rtls, so override the +# default lint module name here. +# +LINT_MODULE = pv_rtls + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(LINT_MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +LDFLAGS += -dy + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/sys/xpv_support.h b/usr/src/uts/i86pc/i86hvm/sys/xpv_support.h new file mode 100644 index 0000000000..c42551b4f8 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/sys/xpv_support.h @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_XPV_SUPPORT_H +#define _SYS_XPV_SUPPORT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ + +#if !defined(_ASM) + +#include <sys/types.h> +#include <sys/inttypes.h> +#include <sys/dditypes.h> + +typedef ulong_t mfn_t; +typedef uint64_t maddr_t; +#define mfn_to_ma(mfn) ((maddr_t)(mfn) << MMU_PAGESHIFT) +#define MFN_INVALID (-(mfn_t)1) + +#define IPL_DEBUG 15 /* domain debug interrupt */ +#define IPL_CONS 9 +#define IPL_VIF 6 +#define IPL_VBD 5 +#define IPL_EVTCHN 1 + +#define INVALID_EVTCHN 0 + +typedef uint_t (*ec_handler_fcn_t)(); + +extern int ec_init(dev_info_t *); +extern void ec_fini(); +extern void ec_bind_evtchn_to_handler(int, pri_t, ec_handler_fcn_t, void *); +extern void ec_unbind_evtchn(int); +extern void ec_notify_via_evtchn(uint_t); +extern void hypervisor_mask_event(uint_t); +extern void hypervisor_unmask_event(uint_t); + +extern int xen_bind_interdomain(int, int, int *); +extern int xen_alloc_unbound_evtchn(int, int *); +extern int xen_xlate_errcode(int error); +extern void *xen_alloc_pages(pgcnt_t cnt); +extern void kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level); + +/* + * Stub functions to allow the FE drivers to build without littering them + * with #ifdefs + */ +extern void balloon_drv_added(int64_t); +extern long balloon_free_pages(uint_t, mfn_t *, caddr_t, pfn_t *); +extern void xen_release_pfn(pfn_t, caddr_t); +extern void reassign_pfn(pfn_t, mfn_t); + +extern int xen_is_64bit; + +#define IN_XPV_PANIC() (__lintzero) + +#ifdef __cplusplus +} +#endif + +#endif /* __ASM */ +#endif /* _SYS_XPV_SUPPORT_H */ diff --git a/usr/src/uts/i86pc/i86hvm/xdf/Makefile b/usr/src/uts/i86pc/i86hvm/xdf/Makefile new file mode 100644 index 0000000000..4b7bbe75d8 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/xdf/Makefile @@ -0,0 +1,89 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# uts/i86pc/xdf/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# i86pc architecture dependent +# +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = xdf +OBJECTS = $(XDF_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(XDF_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# Overrides +CPPFLAGS += -DHVMPV_XDF_VERS=1 +LDFLAGS += -dy -Nmisc/cmlb -Ndrv/xpvd -Ndrv/xpv + +LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/xnf/Makefile b/usr/src/uts/i86pc/i86hvm/xnf/Makefile new file mode 100644 index 0000000000..683572496f --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/xnf/Makefile @@ -0,0 +1,95 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# uts/i86pc/xnf/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the xve +# network driver kernel module. +# +# i86pc architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = xnf +OBJECTS = $(XNF_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(XNF_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Driver depends on MAC & IP +# +CPPFLAGS += -DHVMPV_XNF_VERS=1 +LDFLAGS += -dy -Nmisc/mac -Ndrv/ip -Ndrv/xpvd -Ndrv/xpv + +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/xpv/Makefile b/usr/src/uts/i86pc/i86hvm/xpv/Makefile new file mode 100644 index 0000000000..a1ff318e68 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/xpv/Makefile @@ -0,0 +1,98 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/xpv/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the xpv +# driver, which provides the necessary infrastructure for +# paravirtualized front-end drivers in HVM systems. +# +# i86pc implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = xpv +OBJECTS = $(XPV_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(XPV_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/i86pc/i86hvm/io/xpv + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(CONFMOD) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +CPPFLAGS += -DHVMPV_XPV_VERS=1 +LDFLAGS += -dy -N mach/pcplusmp + +# +# The Xen header files do not lint cleanly. Since the troublesome +# structures form part of the externally defined interface to the +# hypervisor, we're stuck with the noise. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/xpvd/Makefile b/usr/src/uts/i86pc/i86hvm/xpvd/Makefile new file mode 100644 index 0000000000..283bd34e5e --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/xpvd/Makefile @@ -0,0 +1,90 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the xpvd nexus driver +# +# i86pc implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = xpvd +OBJECTS = $(XPVD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(XPVD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/xen/io + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(CONFMOD) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +CPPFLAGS += -DHVMPV_XPVD_VERS=1 +LDFLAGS += -dy -Ndrv/xpv + +LINTTAGS += -erroff=E_STATIC_UNUSED +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ |
