summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormrj <none@none>2007-12-21 14:13:23 -0800
committermrj <none@none>2007-12-21 14:13:23 -0800
commit551bc2a66868b5cb5be6b70ab9f55515e77a39a9 (patch)
treea01e761c9864ea9483c468ced858a0f67edcbf93
parent71a79fe7afa36dcf0de6902c2c6ef432980534d3 (diff)
downloadillumos-joyent-551bc2a66868b5cb5be6b70ab9f55515e77a39a9.tar.gz
PSARC 2007/664 Paravirtualized Drivers for Fully Virtualized xVM Domains
6525093 xnb/xnf should use hypervisor based copy for xnb->xnf data path 6608917 members of struct xnf and xnb need unique names 6609324 deadlock trying to own the HAT migrate lock 6609805 still missing XPV_DISALLOW_MIGRATE/XPV_ALLOW_MIGRATE bracketing in hat_i86.c 6616384 xnb's grant ref unmapping is inefficient 6619947 Solaris should provide a PV network driver for xVM HVM environments 6632774 panic setting up xen console --HG-- rename : usr/src/uts/i86xpv/os/gnttab.c => usr/src/uts/common/xen/os/gnttab.c rename : usr/src/uts/i86xpv/os/hypercall.c => usr/src/uts/common/xen/os/hypercall.c rename : usr/src/uts/i86xpv/sys/gnttab.h => usr/src/uts/common/xen/sys/gnttab.h rename : usr/src/uts/i86xpv/ml/hypersubr.s => usr/src/uts/intel/ia32/ml/hypersubr.s rename : usr/src/uts/i86xpv/sys/hypervisor.h => usr/src/uts/intel/sys/hypervisor.h rename : usr/src/uts/i86xpv/sys/xen_errno.h => usr/src/uts/intel/sys/xen_errno.h
-rw-r--r--usr/src/cmd/boot/filelist/i386/filelist.ramdisk1
-rw-r--r--usr/src/pkgdefs/Makefile3
-rw-r--r--usr/src/pkgdefs/SUNWhea/prototype_i3864
-rw-r--r--usr/src/pkgdefs/SUNWxvmpv/Makefile40
-rw-r--r--usr/src/pkgdefs/SUNWxvmpv/pkginfo.tmpl47
-rw-r--r--usr/src/pkgdefs/SUNWxvmpv/postinstall.tmpl34
-rw-r--r--usr/src/pkgdefs/SUNWxvmpv/preremove.tmpl33
-rw-r--r--usr/src/pkgdefs/SUNWxvmpv/prototype_i38661
-rw-r--r--usr/src/pkgdefs/etc/exception_list_i3865
-rw-r--r--usr/src/tools/scripts/bfu.sh22
-rw-r--r--usr/src/uts/common/sys/thread.h1
-rw-r--r--usr/src/uts/common/xen/io/xdb.c25
-rw-r--r--usr/src/uts/common/xen/io/xdb.h13
-rw-r--r--usr/src/uts/common/xen/io/xdf.c58
-rw-r--r--usr/src/uts/common/xen/io/xdf.h25
-rw-r--r--usr/src/uts/common/xen/io/xenbus_client.c5
-rw-r--r--usr/src/uts/common/xen/io/xenbus_comms.c19
-rw-r--r--usr/src/uts/common/xen/io/xenbus_dev.c9
-rw-r--r--usr/src/uts/common/xen/io/xenbus_probe.c4
-rw-r--r--usr/src/uts/common/xen/io/xenbus_xs.c5
-rw-r--r--usr/src/uts/common/xen/io/xencons.c2
-rw-r--r--usr/src/uts/common/xen/io/xnb.c1066
-rw-r--r--usr/src/uts/common/xen/io/xnb.h124
-rw-r--r--usr/src/uts/common/xen/io/xnbo.c52
-rw-r--r--usr/src/uts/common/xen/io/xnbu.c66
-rw-r--r--usr/src/uts/common/xen/io/xnf.c1285
-rw-r--r--usr/src/uts/common/xen/io/xnf.h154
-rw-r--r--usr/src/uts/common/xen/io/xpvd.c96
-rw-r--r--usr/src/uts/common/xen/io/xpvd.conf28
-rw-r--r--usr/src/uts/common/xen/os/gnttab.c (renamed from usr/src/uts/i86xpv/os/gnttab.c)66
-rw-r--r--usr/src/uts/common/xen/os/hypercall.c (renamed from usr/src/uts/i86xpv/os/hypercall.c)5
-rw-r--r--usr/src/uts/common/xen/os/xvdi.c49
-rw-r--r--usr/src/uts/common/xen/sys/gnttab.h (renamed from usr/src/uts/i86xpv/sys/gnttab.h)0
-rw-r--r--usr/src/uts/common/xen/sys/xendev.h5
-rw-r--r--usr/src/uts/i86pc/Makefile.files5
-rw-r--r--usr/src/uts/i86pc/Makefile.hvm67
-rw-r--r--usr/src/uts/i86pc/Makefile.i86pc.shared3
-rw-r--r--usr/src/uts/i86pc/Makefile.rules31
-rw-r--r--usr/src/uts/i86pc/io/xpv/evtchn.c450
-rw-r--r--usr/src/uts/i86pc/io/xpv/xpv.conf28
-rw-r--r--usr/src/uts/i86pc/io/xpv/xpv_support.c541
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c32
-rw-r--r--usr/src/uts/i86pc/os/mlsetup.c8
-rw-r--r--usr/src/uts/i86pc/os/startup.c38
-rw-r--r--usr/src/uts/i86pc/sys/xpv_support.h91
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c12
-rw-r--r--usr/src/uts/i86pc/xnf/Makefile98
-rw-r--r--usr/src/uts/i86pc/xpv/Makefile101
-rw-r--r--usr/src/uts/i86pc/xpvd/Makefile92
-rw-r--r--usr/src/uts/i86xpv/Makefile.files3
-rw-r--r--usr/src/uts/i86xpv/Makefile.rules12
-rw-r--r--usr/src/uts/i86xpv/os/xen_mmu.c24
-rw-r--r--usr/src/uts/i86xpv/sys/Makefile2
-rw-r--r--usr/src/uts/intel/ia32/ml/hypersubr.s (renamed from usr/src/uts/i86xpv/ml/hypersubr.s)71
-rw-r--r--usr/src/uts/intel/os/name_to_major1
-rw-r--r--usr/src/uts/intel/sys/Makefile4
-rw-r--r--usr/src/uts/intel/sys/hypervisor.h (renamed from usr/src/uts/i86xpv/sys/hypervisor.h)15
-rw-r--r--usr/src/uts/intel/sys/xen_errno.h (renamed from usr/src/uts/i86xpv/sys/xen_errno.h)0
58 files changed, 4045 insertions, 1096 deletions
diff --git a/usr/src/cmd/boot/filelist/i386/filelist.ramdisk b/usr/src/cmd/boot/filelist/i386/filelist.ramdisk
index 4029f595dc..7a7d78e457 100644
--- a/usr/src/cmd/boot/filelist/i386/filelist.ramdisk
+++ b/usr/src/cmd/boot/filelist/i386/filelist.ramdisk
@@ -16,6 +16,7 @@ etc/path_to_inst
etc/rtc_config
etc/system
kernel
+platform/i86hvm/kernel
platform/i86pc/kernel
platform/i86xpv/kernel
platform/i86pc/ucode/GenuineIntel
diff --git a/usr/src/pkgdefs/Makefile b/usr/src/pkgdefs/Makefile
index 6e09175617..60c94941d6 100644
--- a/usr/src/pkgdefs/Makefile
+++ b/usr/src/pkgdefs/Makefile
@@ -142,7 +142,8 @@ i386_SUBDIRS= \
SUNWsi3124 \
SUNWvia823x \
SUNWwpi \
- SUNWxsvc
+ SUNWxsvc \
+ SUNWxvmpv
i386_XMODS= \
BRCMbnx \
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_i386 b/usr/src/pkgdefs/SUNWhea/prototype_i386
index ec0dc2ad4b..5ab4b464d1 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_i386
+++ b/usr/src/pkgdefs/SUNWhea/prototype_i386
@@ -87,6 +87,7 @@ f none usr/include/sys/dktp/dadkio.h 644 root bin
f none usr/include/sys/dktp/fdisk.h 644 root bin
f none usr/include/sys/dma_engine.h 644 root bin
f none usr/include/sys/fp.h 644 root bin
+f none usr/include/sys/hypervisor.h 644 root bin
f none usr/include/sys/i8272A.h 644 root bin
f none usr/include/sys/kd.h 644 root bin
f none usr/include/sys/mc.h 644 root bin
@@ -120,6 +121,7 @@ f none usr/include/sys/traptrace.h 644 root bin
f none usr/include/sys/tss.h 644 root bin
f none usr/include/sys/x86_archext.h 644 root bin
f none usr/include/sys/ucode.h 644 root bin
+f none usr/include/sys/xen_errno.h 644 root bin
d none usr/platform 755 root sys
d none usr/platform/i86pc 755 root sys
d none usr/platform/i86pc/include 755 root bin
@@ -158,9 +160,7 @@ d none usr/platform/i86xpv 755 root sys
d none usr/platform/i86xpv/include 755 root bin
d none usr/platform/i86xpv/include/sys 755 root bin
f none usr/platform/i86xpv/include/sys/balloon.h 644 root bin
-f none usr/platform/i86xpv/include/sys/hypervisor.h 644 root bin
f none usr/platform/i86xpv/include/sys/machprivregs.h 644 root bin
-f none usr/platform/i86xpv/include/sys/xen_errno.h 644 root bin
f none usr/platform/i86xpv/include/sys/xen_mmu.h 644 root bin
f none usr/platform/i86xpv/include/sys/xpv_impl.h 644 root bin
d none usr/platform/i86xpv/include/vm 755 root bin
diff --git a/usr/src/pkgdefs/SUNWxvmpv/Makefile b/usr/src/pkgdefs/SUNWxvmpv/Makefile
new file mode 100644
index 0000000000..a38f5dce9b
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWxvmpv/Makefile
@@ -0,0 +1,40 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+TMPLFILES += postinstall preremove
+DATAFILES += depend
+
+.KEEP_STATE:
+
+all: $(FILES)
+install: all pkg
+
+include ../Makefile.targ
+include ../Makefile.prtarg
diff --git a/usr/src/pkgdefs/SUNWxvmpv/pkginfo.tmpl b/usr/src/pkgdefs/SUNWxvmpv/pkginfo.tmpl
new file mode 100644
index 0000000000..29a5ae2c56
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWxvmpv/pkginfo.tmpl
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+PKG=SUNWxvmpv
+NAME=xVM Paravirtualized Drivers
+ARCH="i386"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGVERS="1.0"
+SUNW_PKGTYPE="root"
+MAXINST="1000"
+CATEGORY=system
+VENDOR="Sun Microsystems, Inc."
+DESC="xVM Paravirtualized Drivers"
+CLASSES="none preserve"
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+BASEDIR=/
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="true"
+SUNW_PKG_THISZONE="false"
diff --git a/usr/src/pkgdefs/SUNWxvmpv/postinstall.tmpl b/usr/src/pkgdefs/SUNWxvmpv/postinstall.tmpl
new file mode 100644
index 0000000000..826d451125
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWxvmpv/postinstall.tmpl
@@ -0,0 +1,34 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include drv_utils
+
+pkg_drvadd -i "pci5853,1" -b "$BASEDIR" xpv || exit 1
+pkg_drvadd xpvd || exit 1
+pkg_drvadd xnf || exit 1
diff --git a/usr/src/pkgdefs/SUNWxvmpv/preremove.tmpl b/usr/src/pkgdefs/SUNWxvmpv/preremove.tmpl
new file mode 100644
index 0000000000..006870f722
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWxvmpv/preremove.tmpl
@@ -0,0 +1,33 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include drv_utils
+pkg_drvrem xnf || exit 1
+pkg_drvrem xpvd || exit 1
+pkg_drvrem xpv || exit 1
diff --git a/usr/src/pkgdefs/SUNWxvmpv/prototype_i386 b/usr/src/pkgdefs/SUNWxvmpv/prototype_i386
new file mode 100644
index 0000000000..16bf5a6a30
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWxvmpv/prototype_i386
@@ -0,0 +1,61 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+#
+i pkginfo
+i copyright
+i depend
+i postinstall
+i preremove
+
+# xVM PV drivers
+d none platform 0755 root sys
+d none platform/i86hvm 0755 root sys
+d none platform/i86hvm/kernel 0755 root sys
+d none platform/i86hvm/kernel/drv 0755 root sys
+d none platform/i86hvm/kernel/drv/amd64 0755 root sys
+f none platform/i86hvm/kernel/drv/amd64/xnf 0755 root sys
+f none platform/i86hvm/kernel/drv/amd64/xpv 0755 root sys
+f none platform/i86hvm/kernel/drv/amd64/xpvd 0755 root sys
+f none platform/i86hvm/kernel/drv/xnf 0755 root sys
+f none platform/i86hvm/kernel/drv/xpv 0755 root sys
+f none platform/i86hvm/kernel/drv/xpv.conf 0644 root sys
+f none platform/i86hvm/kernel/drv/xpvd 0755 root sys
+f none platform/i86hvm/kernel/drv/xpvd.conf 0644 root sys
diff --git a/usr/src/pkgdefs/etc/exception_list_i386 b/usr/src/pkgdefs/etc/exception_list_i386
index 12e706fc7d..8256e86212 100644
--- a/usr/src/pkgdefs/etc/exception_list_i386
+++ b/usr/src/pkgdefs/etc/exception_list_i386
@@ -1034,3 +1034,8 @@ usr/include/libvscan.h i386
#
usr/lib/vscan/llib-lvscan i386
usr/lib/vscan/llib-lvscan.ln i386
+#
+# i86hvm is not a full platform. It is just a home for paravirtualized
+# drivers. There is no usr/ component to this sub-platform, but the
+# directory is created in the proto area to keep other tools happy.
+usr/platform/i86hvm i386
diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh
index f68fa09651..c86cb80626 100644
--- a/usr/src/tools/scripts/bfu.sh
+++ b/usr/src/tools/scripts/bfu.sh
@@ -2149,14 +2149,20 @@ if [ $diskless = no ]; then
chgrp sys $root/platform/sun4u-us3
fi
- if [ $target_isa = i386 -a $archive_type = xpv ]; then
- #
- # On i386, we want to apply the archives for both platforms
- # (i86pc and i86xpv) if they exist. We force the platform
- # to i86xpv so that both will be applied.
- #
- karch=i86pc
- plat=i86xpv
+ if [ $target_isa = i386 ]; then
+ if [ $archive_type = xpv ]; then
+ #
+ # On i386, we want to apply the archives for both
+ # platforms (i86pc and i86xpv) if they exist. We
+ # force the platform to i86xpv so that both will be
+ # applied.
+ #
+ karch=i86pc
+ plat=i86xpv
+ fi
+ if [ ! -d $root/platform/i86hvm ]; then
+ mkdir $root/platform/i86hvm
+ fi
fi
if [ $karch != $plat -a -f ${cpiodir}/${plat}.usr$ZFIX ]; then
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index 74404edd3e..d545e093b3 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -292,6 +292,7 @@ typedef struct _kthread {
uint8_t t_unpark; /* modified holding t_delay_lock */
uint8_t t_release; /* lwp_release() waked up the thread */
uint8_t t_hatdepth; /* depth of recursive hat_memloads */
+ uint8_t t_xpvcntr; /* see xen_block_migrate() */
kcondvar_t t_joincv; /* cv used to wait for thread exit */
void *t_taskq; /* for threads belonging to taskq */
hrtime_t t_anttime; /* most recent time anticipatory load */
diff --git a/usr/src/uts/common/xen/io/xdb.c b/usr/src/uts/common/xen/io/xdb.c
index b640010c22..33a075ac3d 100644
--- a/usr/src/uts/common/xen/io/xdb.c
+++ b/usr/src/uts/common/xen/io/xdb.c
@@ -50,9 +50,30 @@
#pragma ident "%Z%%M% %I% %E% SMI"
-#include "xdb.h"
-#include <sys/lofi.h>
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/dditypes.h>
+#include <sys/sunddi.h>
+#include <sys/list.h>
+#include <sys/dkio.h>
+#include <sys/cmlb.h>
+#include <sys/vtoc.h>
+#include <sys/modctl.h>
+#include <sys/bootconf.h>
+#include <sys/promif.h>
+#include <sys/sysmacros.h>
+#include <public/io/xenbus.h>
+#include <xen/sys/xenbus_impl.h>
+#include <xen/sys/xendev.h>
+#include <sys/gnttab.h>
+#include <sys/scsi/generic/inquiry.h>
+#include <vm/seg_kmem.h>
#include <vm/hat_i86.h>
+#include <sys/gnttab.h>
+#include <sys/lofi.h>
+#include <io/xdf.h>
+#include <io/xdb.h>
static xdb_t *xdb_statep;
static int xdb_debug = 0;
diff --git a/usr/src/uts/common/xen/io/xdb.h b/usr/src/uts/common/xen/io/xdb.h
index 81f6b5d9c2..d4d744d2ac 100644
--- a/usr/src/uts/common/xen/io/xdb.h
+++ b/usr/src/uts/common/xen/io/xdb.h
@@ -34,19 +34,6 @@
extern "C" {
#endif
-#include <sys/types.h>
-#include <sys/conf.h>
-#include <sys/ddi.h>
-#include <sys/dditypes.h>
-#include <sys/sunddi.h>
-#include <sys/sunldi.h>
-#include <sys/modctl.h>
-#include <vm/seg_kmem.h>
-#include <sys/gnttab.h>
-#include <xen/sys/xenbus_impl.h>
-#include <xen/sys/xendev.h>
-#include "xdf.h"
-
#define XDB_DBG_ALL 0xf
#define XDB_DBG_IO 0x1
#define XDB_DBG_INFO 0x2
diff --git a/usr/src/uts/common/xen/io/xdf.c b/usr/src/uts/common/xen/io/xdf.c
index c820bb27c5..4d695ec992 100644
--- a/usr/src/uts/common/xen/io/xdf.c
+++ b/usr/src/uts/common/xen/io/xdf.c
@@ -33,7 +33,30 @@
#pragma ident "%Z%%M% %I% %E% SMI"
-#include "xdf.h"
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/dditypes.h>
+#include <sys/sunddi.h>
+#include <sys/list.h>
+#include <sys/cmlb.h>
+#include <sys/dkio.h>
+#include <sys/vtoc.h>
+#include <sys/modctl.h>
+#include <sys/bootconf.h>
+#include <sys/promif.h>
+#include <sys/sysmacros.h>
+#include <sys/kstat.h>
+#include <sys/mach_mmu.h>
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#endif
+#include <public/io/xenbus.h>
+#include <xen/sys/xenbus_impl.h>
+#include <xen/sys/xendev.h>
+#include <sys/gnttab.h>
+#include <sys/scsi/generic/inquiry.h>
+#include <io/xdf.h>
#define FLUSH_DISKCACHE 0x1
#define WRITE_BARRIER 0x2
@@ -302,6 +325,16 @@ xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
ddi_iblock_cookie_t ibc;
ddi_iblock_cookie_t softibc;
int instance;
+#if defined(XPV_HVM_DRIVER) && defined(__i386)
+ /* XXX: 6609126 32-bit xdf driver panics on a 64-bit dom0 */
+ extern int xen_is_64bit;
+
+ if (xen_is_64bit) {
+ cmn_err(CE_WARN, "xdf cannot be used in 32-bit domUs on a"
+ " 64-bit dom0.");
+ return (DDI_FAILURE);
+ }
+#endif
xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM,
"xdfdebug", 0);
@@ -534,7 +567,11 @@ xdf_suspend(dev_info_t *devi)
/* make sure no more I/O responses left in the ring buffer */
if ((st == XD_INIT) || (st == XD_READY)) {
+#ifdef XPV_HVM_DRIVER
+ ec_unbind_evtchn(vdp->xdf_evtchn);
+#else
(void) ddi_remove_intr(devi, 0, NULL);
+#endif
(void) xdf_drain_io(vdp);
/*
* no need to teardown the ring buffer here
@@ -1437,7 +1474,9 @@ xdf_drain_io(xdf_t *vdp)
if (!xvdi_ring_has_incomp_request(xbr))
goto out;
+#ifndef XPV_HVM_DRIVER
(void) HYPERVISOR_yield();
+#endif
/*
* file-backed devices can be slow
*/
@@ -1616,12 +1655,17 @@ xdf_start_connect(xdf_t *vdp)
ddi_get_name_addr(dip));
goto errout;
}
+ vdp->xdf_evtchn = xvdi_get_evtchn(dip);
+#ifdef XPV_HVM_DRIVER
+ ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp);
+#else
if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) !=
DDI_SUCCESS) {
cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: "
"failed to add intr handler", ddi_get_name_addr(dip));
goto errout1;
}
+#endif
if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE,
sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) !=
@@ -1657,7 +1701,7 @@ trans_retry:
}
if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u",
- xvdi_get_evtchn(dip))) {
+ vdp->xdf_evtchn)) {
cmn_err(CE_WARN, "xdf@%s: failed to write event-channel",
ddi_get_name_addr(dip));
xvdi_fatal_error(dip, rv, "writing event-channel");
@@ -1694,7 +1738,11 @@ abort_trans:
fail_trans:
xvdi_free_ring(vdp->xdf_xb_ring);
errout2:
+#ifdef XPV_HVM_DRIVER
+ ec_unbind_evtchn(vdp->xdf_evtchn);
+#else
(void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
+#endif
errout1:
xvdi_free_evtchn(dip);
errout:
@@ -1786,7 +1834,7 @@ xdf_post_connect(xdf_t *vdp)
/*
* We've created all the minor nodes via cmlb_attach() using default
- * value in xdf_attach() to make it possbile to block in xdf_open(),
+ * value in xdf_attach() to make it possible to block in xdf_open(),
* in case there's anyone (say, booting thread) ever trying to open
* it before connected to backend. We will refresh all those minor
* nodes w/ latest info we've got now when we are almost connected.
@@ -1857,7 +1905,11 @@ xdf_post_connect(xdf_t *vdp)
static void
xdf_post_disconnect(xdf_t *vdp)
{
+#ifdef XPV_HVM_DRIVER
+ ec_unbind_evtchn(vdp->xdf_evtchn);
+#else
(void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
+#endif
xvdi_free_evtchn(vdp->xdf_dip);
xvdi_free_ring(vdp->xdf_xb_ring);
vdp->xdf_xb_ring = NULL;
diff --git a/usr/src/uts/common/xen/io/xdf.h b/usr/src/uts/common/xen/io/xdf.h
index c3992c62fc..ea796772dd 100644
--- a/usr/src/uts/common/xen/io/xdf.h
+++ b/usr/src/uts/common/xen/io/xdf.h
@@ -35,26 +35,6 @@ extern "C" {
#endif
-#include <sys/types.h>
-#include <sys/conf.h>
-#include <sys/ddi.h>
-#include <sys/dditypes.h>
-#include <sys/sunddi.h>
-#include <sys/list.h>
-#include <sys/dkio.h>
-#include <sys/vtoc.h>
-#include <sys/modctl.h>
-#include <sys/bootconf.h>
-#include <sys/promif.h>
-#include <sys/open.h>
-#include <sys/sysmacros.h>
-#include <sys/kstat.h>
-#include <sys/gnttab.h>
-#include <xen/sys/xenbus_impl.h>
-#include <xen/sys/xendev.h>
-#include <sys/cmlb.h>
-#include <sys/scsi/generic/inquiry.h>
-
#define BLKIF_RING_SIZE __RING_SIZE((blkif_sring_t *)NULL, PAGESIZE)
/*
@@ -108,7 +88,7 @@ enum xdf_state {
};
/*
- * 16 paritions + fdisk
+ * 16 partitions + fdisk
*/
#define XDF_PSHIFT 6
#define XDF_PMASK ((1 << XDF_PSHIFT) - 1)
@@ -176,7 +156,7 @@ typedef struct v_req {
* Status set and checked in vreq->v_status by vreq_setup()
*
* These flags will help us to continue the vreq setup work from last failure
- * point, instead of starting from scrath after each failure.
+ * point, instead of starting from scratch after each failure.
*/
#define VREQ_INIT 0x0
#define VREQ_INIT_DONE 0x1
@@ -218,6 +198,7 @@ typedef struct xdf {
int xdf_wce;
char *xdf_flush_mem;
char *xdf_cache_flush_block;
+ int xdf_evtchn;
#ifdef DEBUG
int xdf_dmacallback_num;
#endif
diff --git a/usr/src/uts/common/xen/io/xenbus_client.c b/usr/src/uts/common/xen/io/xenbus_client.c
index b0e2b5e520..b0cb441332 100644
--- a/usr/src/uts/common/xen/io/xenbus_client.c
+++ b/usr/src/uts/common/xen/io/xenbus_client.c
@@ -55,9 +55,14 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#include <sys/hypervisor.h>
+#else
#include <sys/hypervisor.h>
#include <sys/xen_mmu.h>
#include <sys/evtchn_impl.h>
+#endif
#include <sys/gnttab.h>
#include <xen/sys/xenbus_impl.h>
#include <sys/cmn_err.h>
diff --git a/usr/src/uts/common/xen/io/xenbus_comms.c b/usr/src/uts/common/xen/io/xenbus_comms.c
index ee4c162bf4..e7eb20f166 100644
--- a/usr/src/uts/common/xen/io/xenbus_comms.c
+++ b/usr/src/uts/common/xen/io/xenbus_comms.c
@@ -59,12 +59,18 @@
#include <sys/types.h>
#include <vm/hat.h>
#include <vm/as.h>
-#include <sys/bootinfo.h>
#include <sys/bootconf.h>
-#include <vm/kboot_mmu.h>
#include <vm/seg_kmem.h>
+#ifdef XPV_HVM_DRIVER
+#include <sys/pc_mmu.h>
+#include <sys/xpv_support.h>
+#include <sys/hypervisor.h>
+#else
+#include <vm/kboot_mmu.h>
+#include <sys/bootinfo.h>
#include <sys/hypervisor.h>
#include <sys/evtchn_impl.h>
+#endif
#include <sys/condvar.h>
#include <sys/mutex.h>
#include <sys/atomic.h>
@@ -240,10 +246,19 @@ xb_suspend(void)
void
xb_setup_intr(void)
{
+#ifdef XPV_HVM_DRIVER
+ ec_bind_evtchn_to_handler(xen_info->store_evtchn, IPL_XENBUS,
+ xenbus_intr, NULL);
+#else
xenbus_irq = ec_bind_evtchn_to_irq(xen_info->store_evtchn);
+ if (xenbus_irq < 0) {
+ cmn_err(CE_WARN, "Couldn't bind xenbus event channel");
+ return;
+ }
if (!add_avintr(NULL, IPL_XENBUS, (avfunc)xenbus_intr, "xenbus",
xenbus_irq, NULL, NULL, NULL, NULL))
cmn_err(CE_WARN, "XENBUS add intr failed\n");
+#endif
}
/*
diff --git a/usr/src/uts/common/xen/io/xenbus_dev.c b/usr/src/uts/common/xen/io/xenbus_dev.c
index 57c57d886f..0eb82322b0 100644
--- a/usr/src/uts/common/xen/io/xenbus_dev.c
+++ b/usr/src/uts/common/xen/io/xenbus_dev.c
@@ -71,10 +71,15 @@
#include <sys/condvar.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
+#ifdef XPV_HVM_DRIVER
+#include <public/io/xenbus.h>
+#include <public/io/xs_wire.h>
+#include <sys/xpv_support.h>
+#endif
#include <sys/hypervisor.h>
+#include <xen/sys/xenbus.h>
#include <xen/sys/xenbus_comms.h>
#include <xen/sys/xenbus_impl.h>
-#include <xen/sys/xenbus.h>
#include <xen/public/io/xs_wire.h>
#ifdef DEBUG
@@ -287,8 +292,10 @@ xenbusdrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
xenbusdrv_dip = dip;
ddi_report_dev(dip);
+#ifndef XPV_HVM_DRIVER
if (DOMAIN_IS_INITDOMAIN(xen_info))
xs_dom0_init();
+#endif
return (DDI_SUCCESS);
diff --git a/usr/src/uts/common/xen/io/xenbus_probe.c b/usr/src/uts/common/xen/io/xenbus_probe.c
index 18d1e7a7d7..ebf3a12a3e 100644
--- a/usr/src/uts/common/xen/io/xenbus_probe.c
+++ b/usr/src/uts/common/xen/io/xenbus_probe.c
@@ -55,8 +55,10 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#endif
#include <sys/hypervisor.h>
-#include <sys/evtchn_impl.h>
#include <xen/sys/xenbus_impl.h>
#include <xen/sys/xenbus_comms.h>
#include <xen/public/io/xs_wire.h>
diff --git a/usr/src/uts/common/xen/io/xenbus_xs.c b/usr/src/uts/common/xen/io/xenbus_xs.c
index 04ac2988e3..39f41ecd60 100644
--- a/usr/src/uts/common/xen/io/xenbus_xs.c
+++ b/usr/src/uts/common/xen/io/xenbus_xs.c
@@ -78,10 +78,13 @@
#include <sys/sunddi.h>
#include <sys/avintr.h>
#include <sys/cmn_err.h>
+#include <sys/mach_mmu.h>
#include <util/sscanf.h>
#define _XSD_ERRORS_DEFINED
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#endif
#include <sys/hypervisor.h>
-#include <sys/mach_mmu.h>
#include <sys/taskq.h>
#include <sys/sdt.h>
#include <xen/sys/xenbus_impl.h>
diff --git a/usr/src/uts/common/xen/io/xencons.c b/usr/src/uts/common/xen/io/xencons.c
index d6eb84dc91..891b2f18e5 100644
--- a/usr/src/uts/common/xen/io/xencons.c
+++ b/usr/src/uts/common/xen/io/xencons.c
@@ -290,9 +290,9 @@ xenconssetup(struct xencons *xcp)
mutex_exit(&xcp->excl);
} else {
(void) xvdi_alloc_evtchn(xcp->dip);
+ xcp->evtchn = xvdi_get_evtchn(xcp->dip);
(void) ddi_add_intr(xcp->dip, 0, NULL, NULL, xenconsintr,
(caddr_t)xcp);
- xcp->evtchn = xvdi_get_evtchn(xcp->dip);
}
}
diff --git a/usr/src/uts/common/xen/io/xnb.c b/usr/src/uts/common/xen/io/xnb.c
index b13a9354c2..7202754860 100644
--- a/usr/src/uts/common/xen/io/xnb.c
+++ b/usr/src/uts/common/xen/io/xnb.c
@@ -40,6 +40,7 @@
#include <sys/dlpi.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
+#include <sys/types.h>
#include <sys/pattr.h>
#include <vm/seg_kmem.h>
#include <vm/hat_i86.h>
@@ -101,8 +102,17 @@ static void xnb_rxbuf_put(xnb_t *, xnb_rxbuf_t *);
static void xnb_rx_notify_peer(xnb_t *);
static void xnb_rx_complete(xnb_rxbuf_t *);
static void xnb_rx_mark_complete(xnb_t *, RING_IDX, int16_t);
-static void xnb_rx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *);
+static void xnb_rx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *,
+ xnb_rxbuf_t *);
static void xnb_rx_perform_pending_unmop(xnb_t *);
+mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *);
+
+int xnb_unmop_lowwat = NET_TX_RING_SIZE >> 2;
+int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2);
+
+
+boolean_t xnb_hv_copy = B_TRUE;
+boolean_t xnb_explicit_pageflip_set = B_FALSE;
#ifdef XNB_DEBUG
#define NR_GRANT_ENTRIES \
@@ -129,12 +139,17 @@ static char *aux_statistics[] = {
"tx_too_early",
"rx_too_early",
"rx_allocb_failed",
+ "tx_allocb_failed",
+ "tx_foreign_page",
"mac_full",
"spurious_intr",
"allocation_success",
"allocation_failure",
"small_allocation_success",
"small_allocation_failure",
+ "other_allocation_failure",
+ "tx_pageboundary_crossed",
+ "tx_cpoparea_grown",
"csum_hardware",
"csum_software",
};
@@ -155,23 +170,28 @@ xnb_ks_aux_update(kstat_t *ksp, int flag)
* Assignment order should match that of the names in
* aux_statistics.
*/
- (knp++)->value.ui64 = xnbp->x_stat_tx_cksum_deferred;
- (knp++)->value.ui64 = xnbp->x_stat_rx_cksum_no_need;
- (knp++)->value.ui64 = xnbp->x_stat_tx_notify_deferred;
- (knp++)->value.ui64 = xnbp->x_stat_tx_notify_sent;
- (knp++)->value.ui64 = xnbp->x_stat_rx_notify_deferred;
- (knp++)->value.ui64 = xnbp->x_stat_rx_notify_sent;
- (knp++)->value.ui64 = xnbp->x_stat_tx_too_early;
- (knp++)->value.ui64 = xnbp->x_stat_rx_too_early;
- (knp++)->value.ui64 = xnbp->x_stat_rx_allocb_failed;
- (knp++)->value.ui64 = xnbp->x_stat_mac_full;
- (knp++)->value.ui64 = xnbp->x_stat_spurious_intr;
- (knp++)->value.ui64 = xnbp->x_stat_allocation_success;
- (knp++)->value.ui64 = xnbp->x_stat_allocation_failure;
- (knp++)->value.ui64 = xnbp->x_stat_small_allocation_success;
- (knp++)->value.ui64 = xnbp->x_stat_small_allocation_failure;
- (knp++)->value.ui64 = xnbp->x_stat_csum_hardware;
- (knp++)->value.ui64 = xnbp->x_stat_csum_software;
+ (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_deferred;
+ (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_no_need;
+ (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred;
+ (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent;
+ (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred;
+ (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent;
+ (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early;
+ (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early;
+ (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed;
+ (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed;
+ (knp++)->value.ui64 = xnbp->xnb_stat_tx_foreign_page;
+ (knp++)->value.ui64 = xnbp->xnb_stat_mac_full;
+ (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr;
+ (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success;
+ (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure;
+ (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success;
+ (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure;
+ (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure;
+ (knp++)->value.ui64 = xnbp->xnb_stat_tx_pagebndry_crossed;
+ (knp++)->value.ui64 = xnbp->xnb_stat_tx_cpoparea_grown;
+ (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware;
+ (knp++)->value.ui64 = xnbp->xnb_stat_csum_software;
return (0);
}
@@ -187,16 +207,16 @@ xnb_ks_init(xnb_t *xnbp)
/*
* Create and initialise kstats.
*/
- xnbp->x_kstat_aux = kstat_create(ddi_driver_name(xnbp->x_devinfo),
- ddi_get_instance(xnbp->x_devinfo), "aux_statistics", "net",
+ xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo),
+ ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net",
KSTAT_TYPE_NAMED, nstat, 0);
- if (xnbp->x_kstat_aux == NULL)
+ if (xnbp->xnb_kstat_aux == NULL)
return (B_FALSE);
- xnbp->x_kstat_aux->ks_private = xnbp;
- xnbp->x_kstat_aux->ks_update = xnb_ks_aux_update;
+ xnbp->xnb_kstat_aux->ks_private = xnbp;
+ xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update;
- knp = xnbp->x_kstat_aux->ks_data;
+ knp = xnbp->xnb_kstat_aux->ks_data;
while (nstat > 0) {
kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
@@ -205,7 +225,7 @@ xnb_ks_init(xnb_t *xnbp)
nstat--;
}
- kstat_install(xnbp->x_kstat_aux);
+ kstat_install(xnbp->xnb_kstat_aux);
return (B_TRUE);
}
@@ -213,7 +233,7 @@ xnb_ks_init(xnb_t *xnbp)
static void
xnb_ks_free(xnb_t *xnbp)
{
- kstat_delete(xnbp->x_kstat_aux);
+ kstat_delete(xnbp->xnb_kstat_aux);
}
/*
@@ -301,7 +321,7 @@ xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab)
0, 0, 0, 0,
HCK_FULLCKSUM, KM_NOSLEEP);
- xnbp->x_stat_csum_hardware++;
+ xnbp->xnb_stat_csum_hardware++;
return (mp);
}
@@ -323,7 +343,7 @@ software:
* We are not able to use any offload so do the whole thing in
* software.
*/
- xnbp->x_stat_csum_software++;
+ xnbp->xnb_stat_csum_software++;
return (xnb_software_csum(xnbp, mp));
}
@@ -336,38 +356,46 @@ xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP);
- xnbp->x_flavour = flavour;
- xnbp->x_flavour_data = flavour_data;
- xnbp->x_devinfo = dip;
- xnbp->x_evtchn = INVALID_EVTCHN;
- xnbp->x_irq = B_FALSE;
- xnbp->x_tx_ring_handle = INVALID_GRANT_HANDLE;
- xnbp->x_rx_ring_handle = INVALID_GRANT_HANDLE;
- xnbp->x_cksum_offload = xnb_cksum_offload;
- xnbp->x_connected = B_FALSE;
- xnbp->x_hotplugged = B_FALSE;
- xnbp->x_detachable = B_FALSE;
- xnbp->x_peer = xvdi_get_oeid(dip);
- xnbp->x_rx_pages_writable = B_FALSE;
-
- xnbp->x_rx_buf_count = 0;
- xnbp->x_rx_unmop_count = 0;
-
- xnbp->x_tx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
- ASSERT(xnbp->x_tx_va != NULL);
-
- if (ddi_get_iblock_cookie(dip, 0, &xnbp->x_icookie)
+ xnbp->xnb_flavour = flavour;
+ xnbp->xnb_flavour_data = flavour_data;
+ xnbp->xnb_devinfo = dip;
+ xnbp->xnb_evtchn = INVALID_EVTCHN;
+ xnbp->xnb_irq = B_FALSE;
+ xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
+ xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
+ xnbp->xnb_cksum_offload = xnb_cksum_offload;
+ xnbp->xnb_connected = B_FALSE;
+ xnbp->xnb_hotplugged = B_FALSE;
+ xnbp->xnb_detachable = B_FALSE;
+ xnbp->xnb_peer = xvdi_get_oeid(dip);
+ xnbp->xnb_rx_pages_writable = B_FALSE;
+
+ xnbp->xnb_rx_buf_count = 0;
+ xnbp->xnb_rx_unmop_count = 0;
+
+ xnbp->xnb_hv_copy = B_FALSE;
+
+ xnbp->xnb_tx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
+ ASSERT(xnbp->xnb_tx_va != NULL);
+
+ if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie)
!= DDI_SUCCESS)
goto failure;
- mutex_init(&xnbp->x_tx_lock, NULL, MUTEX_DRIVER, xnbp->x_icookie);
- mutex_init(&xnbp->x_rx_lock, NULL, MUTEX_DRIVER, xnbp->x_icookie);
+ /* allocated on demand, when/if we enter xnb_copy_to_peer() */
+ xnbp->xnb_tx_cpop = NULL;
+ xnbp->xnb_cpop_sz = 0;
+
+ mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER,
+ xnbp->xnb_icookie);
+ mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER,
+ xnbp->xnb_icookie);
/* set driver private pointer now */
ddi_set_driver_private(dip, xnbp);
if (!xnb_ks_init(xnbp))
- goto late_failure;
+ goto failure_1;
/*
* Receive notification of changes in the state of the
@@ -375,35 +403,52 @@ xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
*/
if (xvdi_add_event_handler(dip, XS_OE_STATE,
xnb_oe_state_change) != DDI_SUCCESS)
- goto very_late_failure;
+ goto failure_2;
/*
* Receive notification of hotplug events.
*/
if (xvdi_add_event_handler(dip, XS_HP_STATE,
xnb_hp_state_change) != DDI_SUCCESS)
- goto very_late_failure;
+ goto failure_2;
xsname = xvdi_get_xsname(dip);
if (xenbus_printf(XBT_NULL, xsname,
"feature-no-csum-offload", "%d",
- xnbp->x_cksum_offload ? 0 : 1) != 0)
- goto very_very_late_failure;
+ xnbp->xnb_cksum_offload ? 0 : 1) != 0)
+ goto failure_3;
+
+ /*
+ * Use global xnb_hv_copy to export this feature. This means that
+ * we have to decide what to do before starting up a guest domain
+ */
+ if (xenbus_printf(XBT_NULL, xsname,
+ "feature-rx-copy", "%d", xnb_hv_copy ? 1 : 0) != 0)
+ goto failure_3;
+ /*
+ * Linux domUs seem to depend on "feature-rx-flip" being 0
+ * in addition to "feature-rx-copy" being 1. It seems strange
+ * to use four possible states to describe a binary decision,
+ * but we might as well play nice.
+ */
+ if (xenbus_printf(XBT_NULL, xsname,
+ "feature-rx-flip", "%d", xnb_explicit_pageflip_set ? 1 : 0) != 0)
+ goto failure_3;
if (xenbus_scanf(XBT_NULL, xsname,
"mac", "%s", mac) != 0) {
cmn_err(CE_WARN, "xnb_attach: "
"cannot read mac address from %s",
xsname);
- goto very_very_late_failure;
+ goto failure_3;
}
- if (ether_aton(mac, xnbp->x_mac_addr) != ETHERADDRL) {
+ if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) {
cmn_err(CE_WARN,
"xnb_attach: cannot parse mac address %s",
mac);
- goto very_very_late_failure;
+ goto failure_3;
}
(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
@@ -411,18 +456,18 @@ xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
return (DDI_SUCCESS);
-very_very_late_failure: /* not that the naming is getting silly or anything */
+failure_3:
xvdi_remove_event_handler(dip, NULL);
-very_late_failure:
+failure_2:
xnb_ks_free(xnbp);
-late_failure:
- mutex_destroy(&xnbp->x_rx_lock);
- mutex_destroy(&xnbp->x_tx_lock);
+failure_1:
+ mutex_destroy(&xnbp->xnb_rx_lock);
+ mutex_destroy(&xnbp->xnb_tx_lock);
failure:
- vmem_free(heap_arena, xnbp->x_tx_va, PAGESIZE);
+ vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE);
kmem_free(xnbp, sizeof (*xnbp));
return (DDI_FAILURE);
}
@@ -434,8 +479,8 @@ xnb_detach(dev_info_t *dip)
xnb_t *xnbp = ddi_get_driver_private(dip);
ASSERT(xnbp != NULL);
- ASSERT(!xnbp->x_connected);
- ASSERT(xnbp->x_rx_buf_count == 0);
+ ASSERT(!xnbp->xnb_connected);
+ ASSERT(xnbp->xnb_rx_buf_count == 0);
xnb_disconnect_rings(dip);
@@ -445,11 +490,15 @@ xnb_detach(dev_info_t *dip)
ddi_set_driver_private(dip, NULL);
- mutex_destroy(&xnbp->x_tx_lock);
- mutex_destroy(&xnbp->x_rx_lock);
+ mutex_destroy(&xnbp->xnb_tx_lock);
+ mutex_destroy(&xnbp->xnb_rx_lock);
+
+ if (xnbp->xnb_cpop_sz > 0)
+ kmem_free(xnbp->xnb_tx_cpop, sizeof (*xnbp->xnb_tx_cpop)
+ * xnbp->xnb_cpop_sz);
- ASSERT(xnbp->x_tx_va != NULL);
- vmem_free(heap_arena, xnbp->x_tx_va, PAGESIZE);
+ ASSERT(xnbp->xnb_tx_va != NULL);
+ vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE);
kmem_free(xnbp, sizeof (*xnbp));
}
@@ -467,29 +516,27 @@ xnb_alloc_page(xnb_t *xnbp)
mutex_enter(&xnb_alloc_page_lock);
if (nth == BATCH_SIZE) {
if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) {
- xnbp->x_stat_allocation_failure++;
+ xnbp->xnb_stat_allocation_failure++;
mutex_exit(&xnb_alloc_page_lock);
/*
* Try for a single page in low memory situations.
*/
if (balloon_alloc_pages(1, &mfn) != 1) {
- xnbp->x_stat_small_allocation_failure++;
- if ((xnbp->x_stat_small_allocation_failure
- % WARNING_RATE_LIMIT) == 0) {
+ if ((xnbp->xnb_stat_small_allocation_failure++
+ % WARNING_RATE_LIMIT) == 0)
cmn_err(CE_WARN, "xnb_alloc_page: "
"Cannot allocate memory to "
"transfer packets to peer.");
- }
return (0);
} else {
- xnbp->x_stat_small_allocation_success++;
+ xnbp->xnb_stat_small_allocation_success++;
return (mfn);
}
}
nth = 0;
- xnbp->x_stat_allocation_success++;
+ xnbp->xnb_stat_allocation_success++;
}
mfn = mfns[nth++];
@@ -524,6 +571,16 @@ xnb_free_page(xnb_t *xnbp, mfn_t mfn)
}
}
+/*
+ * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but
+ * using local variables.
+ */
+#define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \
+ ((((_r)->sring->req_prod - loop) < \
+ (RING_SIZE(_r) - (loop - prod))) ? \
+ ((_r)->sring->req_prod - loop) : \
+ (RING_SIZE(_r) - (loop - prod)))
+
mblk_t *
xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
{
@@ -549,35 +606,26 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
* to transfer them.
*/
- mutex_enter(&xnbp->x_tx_lock);
+ mutex_enter(&xnbp->xnb_tx_lock);
/*
* If we are not connected to the peer or have not yet
* finished hotplug it is too early to pass packets to the
* peer.
*/
- if (!(xnbp->x_connected && xnbp->x_hotplugged)) {
- mutex_exit(&xnbp->x_tx_lock);
- xnbp->x_stat_tx_too_early++;
+ if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
+ mutex_exit(&xnbp->xnb_tx_lock);
+ DTRACE_PROBE(flip_tx_too_early);
+ xnbp->xnb_stat_tx_too_early++;
return (mp);
}
- loop = xnbp->x_rx_ring.req_cons;
- prod = xnbp->x_rx_ring.rsp_prod_pvt;
- gop = xnbp->x_tx_top;
-
- /*
- * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->x_rx_ring) but
- * using local variables.
- */
-#define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \
- ((((_r)->sring->req_prod - loop) < \
- (RING_SIZE(_r) - (loop - prod))) ? \
- ((_r)->sring->req_prod - loop) : \
- (RING_SIZE(_r) - (loop - prod)))
+ loop = xnbp->xnb_rx_ring.req_cons;
+ prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
+ gop = xnbp->xnb_tx_top;
while ((mp != NULL) &&
- XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->x_rx_ring)) {
+ XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
mfn_t mfn;
pfn_t pfn;
@@ -590,12 +638,12 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
/* 1 */
if ((mfn = xnb_alloc_page(xnbp)) == 0) {
- xnbp->x_stat_xmit_defer++;
+ xnbp->xnb_stat_xmit_defer++;
break;
}
/* 2 */
- rxreq = RING_GET_REQUEST(&xnbp->x_rx_ring, loop);
+ rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
#ifdef XNB_DEBUG
if (!(rxreq->id < NET_RX_RING_SIZE))
@@ -610,14 +658,14 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
/* Assign a pfn and map the new page at the allocated va. */
pfn = xen_assign_pfn(mfn);
- hat_devload(kas.a_hat, xnbp->x_tx_va, PAGESIZE,
+ hat_devload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE,
pfn, PROT_READ | PROT_WRITE, HAT_LOAD);
offset = TX_BUFFER_HEADROOM;
/* 3 */
len = 0;
- valoop = xnbp->x_tx_va + offset;
+ valoop = xnbp->xnb_tx_va + offset;
for (ml = mp; ml != NULL; ml = ml->b_cont) {
size_t chunk = ml->b_wptr - ml->b_rptr;
@@ -629,26 +677,26 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
ASSERT(len + offset < PAGESIZE);
/* Release the pfn. */
- hat_unload(kas.a_hat, xnbp->x_tx_va, PAGESIZE,
+ hat_unload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE,
HAT_UNLOAD_UNMAP);
xen_release_pfn(pfn);
/* 4 */
gop->mfn = mfn;
- gop->domid = xnbp->x_peer;
+ gop->domid = xnbp->xnb_peer;
gop->ref = rxreq->gref;
/* 5.1 */
- rxresp = RING_GET_RESPONSE(&xnbp->x_rx_ring, prod);
+ rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
rxresp->offset = offset;
rxresp->flags = 0;
- cksum_flags = xnbp->x_flavour->xf_cksum_to_peer(xnbp, mp);
+ cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
if (cksum_flags != 0)
- xnbp->x_stat_tx_cksum_deferred++;
+ xnbp->xnb_stat_tx_cksum_deferred++;
rxresp->flags |= cksum_flags;
- rxresp->id = RING_GET_REQUEST(&xnbp->x_rx_ring, prod)->id;
+ rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
rxresp->status = len;
loop++;
@@ -661,8 +709,8 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
/*
* Did we actually do anything?
*/
- if (loop == xnbp->x_rx_ring.req_cons) {
- mutex_exit(&xnbp->x_tx_lock);
+ if (loop == xnbp->xnb_rx_ring.req_cons) {
+ mutex_exit(&xnbp->xnb_tx_lock);
return (mp);
}
@@ -674,14 +722,14 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
ASSERT(prev != NULL);
prev->b_next = NULL;
- if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->x_tx_top,
- loop - xnbp->x_rx_ring.req_cons) != 0) {
+ if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_tx_top,
+ loop - xnbp->xnb_rx_ring.req_cons) != 0) {
cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed");
}
- loop = xnbp->x_rx_ring.req_cons;
- prod = xnbp->x_rx_ring.rsp_prod_pvt;
- gop = xnbp->x_tx_top;
+ loop = xnbp->xnb_rx_ring.req_cons;
+ prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
+ gop = xnbp->xnb_tx_top;
while (loop < end) {
int16_t status = NETIF_RSP_OKAY;
@@ -716,11 +764,11 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
/* 5.2 */
if (status != NETIF_RSP_OKAY) {
- RING_GET_RESPONSE(&xnbp->x_rx_ring, prod)->status =
+ RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
status;
} else {
- xnbp->x_stat_opackets++;
- xnbp->x_stat_obytes += len;
+ xnbp->xnb_stat_opackets++;
+ xnbp->xnb_stat_obytes += len;
}
loop++;
@@ -728,23 +776,23 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
gop++;
}
- xnbp->x_rx_ring.req_cons = loop;
- xnbp->x_rx_ring.rsp_prod_pvt = prod;
+ xnbp->xnb_rx_ring.req_cons = loop;
+ xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
/* 6 */
- /*LINTED: constant in conditional context*/
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->x_rx_ring, notify);
+ /* LINTED: constant in conditional context */
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
if (notify) {
- ec_notify_via_evtchn(xnbp->x_evtchn);
- xnbp->x_stat_tx_notify_sent++;
+ ec_notify_via_evtchn(xnbp->xnb_evtchn);
+ xnbp->xnb_stat_tx_notify_sent++;
} else {
- xnbp->x_stat_tx_notify_deferred++;
+ xnbp->xnb_stat_tx_notify_deferred++;
}
if (mp != NULL)
- xnbp->x_stat_xmit_defer++;
+ xnbp->xnb_stat_xmit_defer++;
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
/* Free mblk_t's that we consumed. */
freemsgchain(free);
@@ -752,6 +800,387 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
return (mp);
}
+/* helper functions for xnb_copy_to_peer */
+
+/*
+ * Grow the array of copy operation descriptors.
+ * Returns a pointer to the next available entry.
+ */
+gnttab_copy_t *
+grow_cpop_area(xnb_t *xnbp, gnttab_copy_t *o_cpop)
+{
+ /*
+ * o_cpop (arg.1) is a ptr to the area we would like to copy
+ * something into but cannot, because we haven't alloc'ed it
+ * yet, or NULL.
+ * old_cpop and new_cpop (local) are pointers to old/new
+ * versions of xnbp->xnb_tx_cpop.
+ */
+ gnttab_copy_t *new_cpop, *old_cpop, *ret_cpop;
+ size_t newcount;
+
+ ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
+
+ old_cpop = xnbp->xnb_tx_cpop;
+ /*
+ * o_cpop is a pointer into the array pointed to by old_cpop;
+ * it would be an error for exactly one of these pointers to be NULL.
+ * We shouldn't call this function if xnb_tx_cpop has already
+ * been allocated, but we're starting to fill it from the beginning
+ * again.
+ */
+ ASSERT((o_cpop == NULL && old_cpop == NULL) ||
+ (o_cpop != NULL && old_cpop != NULL && o_cpop != old_cpop));
+
+ newcount = xnbp->xnb_cpop_sz + CPOP_DEFCNT;
+
+ new_cpop = kmem_alloc(sizeof (*new_cpop) * newcount, KM_NOSLEEP);
+ if (new_cpop == NULL) {
+ xnbp->xnb_stat_other_allocation_failure++;
+ return (NULL);
+ }
+
+ if (o_cpop != NULL) {
+ size_t offset = (o_cpop - old_cpop);
+
+ /* we only need to move the parts in use ... */
+ (void) memmove(new_cpop, old_cpop, xnbp->xnb_cpop_sz *
+ (sizeof (*old_cpop)));
+
+ kmem_free(old_cpop, xnbp->xnb_cpop_sz * sizeof (*old_cpop));
+
+ ret_cpop = new_cpop + offset;
+ } else {
+ ret_cpop = new_cpop;
+ }
+
+ xnbp->xnb_tx_cpop = new_cpop;
+ xnbp->xnb_cpop_sz = newcount;
+
+ xnbp->xnb_stat_tx_cpoparea_grown++;
+
+ return (ret_cpop);
+}
+
+/*
+ * Check whether an address is on a page that's foreign to this domain.
+ */
+static boolean_t
+is_foreign(void *addr)
+{
+ pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
+
+ return (pfn & PFN_IS_FOREIGN_MFN ? B_TRUE : B_FALSE);
+}
+
+/*
+ * Insert a newly allocated mblk into a chain, replacing the old one.
+ */
+static mblk_t *
+replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev)
+{
+ uint32_t start, stuff, end, value, flags;
+ mblk_t *new_mp;
+
+ new_mp = copyb(mp);
+ if (new_mp == NULL)
+ cmn_err(CE_PANIC, "replace_msg: cannot alloc new message"
+ "for %p, len %lu", (void *) mp, len);
+
+ hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
+ (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value,
+ flags, KM_NOSLEEP);
+
+ new_mp->b_next = mp->b_next;
+ new_mp->b_prev = mp->b_prev;
+ new_mp->b_cont = mp->b_cont;
+
+ /* Make sure we only overwrite pointers to the mblk being replaced. */
+ if (mp_prev != NULL && mp_prev->b_next == mp)
+ mp_prev->b_next = new_mp;
+
+ if (ml_prev != NULL && ml_prev->b_cont == mp)
+ ml_prev->b_cont = new_mp;
+
+ mp->b_next = mp->b_prev = mp->b_cont = NULL;
+ freemsg(mp);
+
+ return (new_mp);
+}
+
+/*
+ * Set all the fields in a gnttab_copy_t.
+ */
+static void
+setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr,
+ size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref)
+{
+ ASSERT(xnbp != NULL && gp != NULL);
+
+ gp->source.offset = s_off;
+ gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr));
+ gp->source.domid = DOMID_SELF;
+
+ gp->len = (uint16_t)len;
+ gp->flags = GNTCOPY_dest_gref;
+ gp->status = 0;
+
+ gp->dest.u.ref = d_ref;
+ gp->dest.offset = d_off;
+ gp->dest.domid = xnbp->xnb_peer;
+}
+
+mblk_t *
+xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
+{
+ mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp;
+ mblk_t *ml, *ml_prev;
+ gnttab_copy_t *gop_cp;
+ boolean_t notify;
+ RING_IDX loop, prod;
+ int i;
+
+ if (!xnbp->xnb_hv_copy)
+ return (xnb_to_peer(xnbp, mp));
+
+ /*
+ * For each packet the sequence of operations is:
+ *
+ * 1. get a request slot from the ring.
+ * 2. set up data for hypercall (see NOTE below)
+ * 3. have the hypervisore copy the data
+ * 4. update the request slot.
+ * 5. kick the peer.
+ *
+ * NOTE ad 2.
+ * In order to reduce the number of hypercalls, we prepare
+ * several packets (mp->b_cont != NULL) for the peer and
+ * perform a single hypercall to transfer them.
+ * We also have to set up a seperate copy operation for
+ * every page.
+ *
+ * If we have more than one message (mp->b_next != NULL),
+ * we do this whole dance repeatedly.
+ */
+
+ mutex_enter(&xnbp->xnb_tx_lock);
+
+ if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
+ mutex_exit(&xnbp->xnb_tx_lock);
+ DTRACE_PROBE(copy_tx_too_early);
+ xnbp->xnb_stat_tx_too_early++;
+ return (mp);
+ }
+
+ loop = xnbp->xnb_rx_ring.req_cons;
+ prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
+
+ while ((mp != NULL) &&
+ XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
+ netif_rx_request_t *rxreq;
+ netif_rx_response_t *rxresp;
+ size_t offset, d_offset;
+ size_t len;
+ uint16_t cksum_flags;
+ int16_t status = NETIF_RSP_OKAY;
+ int item_count;
+
+ /* 1 */
+ rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
+
+#ifdef XNB_DEBUG
+ if (!(rxreq->id < NET_RX_RING_SIZE))
+ cmn_err(CE_PANIC, "xnb_copy_to_peer: "
+ "id %d out of range in request 0x%p",
+ rxreq->id, (void *)rxreq);
+ if (rxreq->gref >= NR_GRANT_ENTRIES)
+ cmn_err(CE_PANIC, "xnb_copy_to_peer: "
+ "grant ref %d out of range in request 0x%p",
+ rxreq->gref, (void *)rxreq);
+#endif /* XNB_DEBUG */
+
+ /* 2 */
+ d_offset = offset = TX_BUFFER_HEADROOM;
+ len = 0;
+ item_count = 0;
+
+ gop_cp = xnbp->xnb_tx_cpop;
+
+ /*
+ * We walk the b_cont pointers and set up a gop_cp
+ * structure for every page in every data block we have.
+ */
+ /* 2a */
+ for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) {
+ size_t chunk = ml->b_wptr - ml->b_rptr;
+ uchar_t *r_tmp, *rpt_align;
+ size_t r_offset;
+
+ /*
+ * If we get an mblk on a page that doesn't belong to
+ * this domain, get a new mblk to replace the old one.
+ */
+ if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) {
+ mblk_t *ml_new = replace_msg(ml, chunk,
+ mp_prev, ml_prev);
+
+ /* We can still use old ml, but not *ml! */
+ if (free == ml)
+ free = ml_new;
+ if (mp == ml)
+ mp = ml_new;
+ ml = ml_new;
+
+ xnbp->xnb_stat_tx_foreign_page++;
+ }
+
+ rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr);
+ r_offset = (uint16_t)(ml->b_rptr - rpt_align);
+ r_tmp = ml->b_rptr;
+
+ if (d_offset + chunk > PAGESIZE)
+ cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p "
+ "(svd: %p), ml %p,rpt_alg. %p, d_offset "
+ "(%lu) + chunk (%lu) > PAGESIZE %d!",
+ (void *)mp, (void *)saved_mp, (void *)ml,
+ (void *)rpt_align,
+ d_offset, chunk, (int)PAGESIZE);
+
+ while (chunk > 0) {
+ size_t part_len;
+
+ item_count++;
+ if (item_count > xnbp->xnb_cpop_sz) {
+ gop_cp = grow_cpop_area(xnbp, gop_cp);
+ if (gop_cp == NULL)
+ goto failure;
+ }
+ /*
+ * If our mblk crosses a page boundary, we need
+ * to do a seperate copy for every page.
+ */
+ if (r_offset + chunk > PAGESIZE) {
+ part_len = PAGESIZE - r_offset;
+
+ DTRACE_PROBE3(mblk_page_crossed,
+ (mblk_t *), ml, int, chunk, int,
+ (int)r_offset);
+
+ xnbp->xnb_stat_tx_pagebndry_crossed++;
+ } else {
+ part_len = chunk;
+ }
+
+ setup_gop(xnbp, gop_cp, r_tmp, r_offset,
+ d_offset, part_len, rxreq->gref);
+
+ chunk -= part_len;
+
+ len += part_len;
+ d_offset += part_len;
+ r_tmp += part_len;
+ /*
+ * The 2nd, 3rd ... last copies will always
+ * start at r_tmp, therefore r_offset is 0.
+ */
+ r_offset = 0;
+ gop_cp++;
+ }
+ ml_prev = ml;
+ DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int,
+ chunk, int, len, int, item_count);
+ }
+ /* 3 */
+ if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_tx_cpop,
+ item_count) != 0) {
+ cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed");
+ DTRACE_PROBE(HV_granttableopfailed);
+ }
+
+ /* 4 */
+ rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
+ rxresp->offset = offset;
+
+ rxresp->flags = 0;
+
+ DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int,
+ (int)rxresp->offset, int, (int)rxresp->flags, int,
+ (int)rxresp->status);
+
+ cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
+ if (cksum_flags != 0)
+ xnbp->xnb_stat_tx_cksum_deferred++;
+ rxresp->flags |= cksum_flags;
+
+ rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
+ rxresp->status = len;
+
+ DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int,
+ (int)rxresp->offset, int, (int)rxresp->flags, int,
+ (int)rxresp->status);
+
+ for (i = 0; i < item_count; i++) {
+ if (xnbp->xnb_tx_cpop[i].status != 0) {
+ DTRACE_PROBE2(cpop__status__nonnull, int,
+ (int)xnbp->xnb_tx_cpop[i].status,
+ int, i);
+ status = NETIF_RSP_ERROR;
+ }
+ }
+
+ /* 5.2 */
+ if (status != NETIF_RSP_OKAY) {
+ RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
+ status;
+ } else {
+ xnbp->xnb_stat_opackets++;
+ xnbp->xnb_stat_obytes += len;
+ }
+
+ loop++;
+ prod++;
+ mp_prev = mp;
+ mp = mp->b_next;
+ }
+failure:
+ /*
+ * Did we actually do anything?
+ */
+ if (loop == xnbp->xnb_rx_ring.req_cons) {
+ mutex_exit(&xnbp->xnb_tx_lock);
+ return (mp);
+ }
+
+ /*
+ * Unlink the end of the 'done' list from the remainder.
+ */
+ ASSERT(mp_prev != NULL);
+ mp_prev->b_next = NULL;
+
+ xnbp->xnb_rx_ring.req_cons = loop;
+ xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
+
+ /* 6 */
+ /* LINTED: constant in conditional context */
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
+ if (notify) {
+ ec_notify_via_evtchn(xnbp->xnb_evtchn);
+ xnbp->xnb_stat_tx_notify_sent++;
+ } else {
+ xnbp->xnb_stat_tx_notify_deferred++;
+ }
+
+ if (mp != NULL)
+ xnbp->xnb_stat_xmit_defer++;
+
+ mutex_exit(&xnbp->xnb_tx_lock);
+
+ /* Free mblk_t structs we have consumed. */
+ freemsgchain(free);
+
+ return (mp);
+}
+
/*ARGSUSED*/
static int
xnb_rxbuf_constructor(void *buf, void *arg, int kmflag)
@@ -803,15 +1232,15 @@ xnb_rx_notify_peer(xnb_t *xnbp)
{
boolean_t notify;
- ASSERT(MUTEX_HELD(&xnbp->x_rx_lock));
+ ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
- /*LINTED: constant in conditional context*/
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->x_tx_ring, notify);
+ /* LINTED: constant in conditional context */
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify);
if (notify) {
- ec_notify_via_evtchn(xnbp->x_evtchn);
- xnbp->x_stat_rx_notify_sent++;
+ ec_notify_via_evtchn(xnbp->xnb_evtchn);
+ xnbp->xnb_stat_rx_notify_sent++;
} else {
- xnbp->x_stat_rx_notify_deferred++;
+ xnbp->xnb_stat_rx_notify_deferred++;
}
}
@@ -822,19 +1251,9 @@ xnb_rx_complete(xnb_rxbuf_t *rxp)
ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE);
- mutex_enter(&xnbp->x_rx_lock);
-
- xnb_rx_schedule_unmop(xnbp, &rxp->xr_mop);
- xnb_rx_perform_pending_unmop(xnbp);
-
- if (xnbp->x_connected) {
- xnb_rx_mark_complete(xnbp, rxp->xr_id, rxp->xr_status);
- xnb_rx_notify_peer(xnbp);
- }
-
- xnb_rxbuf_put(xnbp, rxp);
-
- mutex_exit(&xnbp->x_rx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
+ xnb_rx_schedule_unmop(xnbp, &rxp->xr_mop, rxp);
+ mutex_exit(&xnbp->xnb_rx_lock);
}
static void
@@ -843,15 +1262,15 @@ xnb_rx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
RING_IDX i;
netif_tx_response_t *txresp;
- ASSERT(MUTEX_HELD(&xnbp->x_rx_lock));
+ ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
- i = xnbp->x_tx_ring.rsp_prod_pvt;
+ i = xnbp->xnb_tx_ring.rsp_prod_pvt;
- txresp = RING_GET_RESPONSE(&xnbp->x_tx_ring, i);
+ txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i);
txresp->id = id;
txresp->status = status;
- xnbp->x_tx_ring.rsp_prod_pvt = i + 1;
+ xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1;
/*
* Note that we don't push the change to the peer here - that
@@ -859,61 +1278,75 @@ xnb_rx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
*/
}
-/*
- * XXPV dme: currently pending unmap operations are stored on a
- * per-instance basis. Should they be per-driver? The locking would
- * have to change (obviously), but there might be an improvement from
- * batching more together. Right now they are all 'done' either at
- * the tail of each receive operation (copy case) or on each
- * completion (non-copy case). Should that be changed to some
- * interval (watermark?) to improve the chance of batching?
- */
static void
-xnb_rx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop)
+xnb_rx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop,
+ xnb_rxbuf_t *rxp)
{
- gnttab_unmap_grant_ref_t *unmop;
+ gnttab_unmap_grant_ref_t *unmop;
+ int u_count;
+ int reqs_on_ring;
- ASSERT(MUTEX_HELD(&xnbp->x_rx_lock));
- ASSERT(xnbp->x_rx_unmop_count <= NET_TX_RING_SIZE);
+ ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
+ ASSERT(xnbp->xnb_rx_unmop_count < NET_TX_RING_SIZE);
- unmop = &xnbp->x_rx_unmop[xnbp->x_rx_unmop_count];
- xnbp->x_rx_unmop_count++;
+ u_count = xnbp->xnb_rx_unmop_count++;
+ /* Cache data for the time when we actually unmap grant refs */
+ xnbp->xnb_rx_unmop_rxp[u_count] = rxp;
+
+ unmop = &xnbp->xnb_rx_unmop[u_count];
unmop->host_addr = mop->host_addr;
unmop->dev_bus_addr = mop->dev_bus_addr;
unmop->handle = mop->handle;
-#ifdef XNB_DEBUG
- if (xnbp->x_rx_unmop_count <= NET_TX_RING_SIZE)
- ASSERT(xnbp->x_rx_unmop[xnbp->x_rx_unmop_count].host_addr
- == NULL);
-#endif /* XNB_DEBUG */
+ /*
+ * We cannot check the ring once we're disconnected from it. Batching
+ * doesn't seem to be a useful optimisation in this case either,
+ * so we directly call into the actual unmap function.
+ */
+ if (xnbp->xnb_connected) {
+ reqs_on_ring = RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring);
+ /*
+ * By tuning xnb_unmop_hiwat to N, we can emulate "N per batch"
+ * or (with N == 1) "immediate unmop" behaviour.
+ * The "> xnb_unmop_lowwat" is a guard against ring exhaustion.
+ */
+ if (xnbp->xnb_rx_unmop_count < xnb_unmop_hiwat &&
+ reqs_on_ring > xnb_unmop_lowwat)
+ return;
+ }
+
+ xnb_rx_perform_pending_unmop(xnbp);
}
+/*
+ * Here we perform the actual unmapping of the data that was
+ * accumulated in xnb_rx_schedule_unmop().
+ * Note that it is the caller's responsibility to make sure that
+ * there's actually something there to unmop.
+ */
static void
xnb_rx_perform_pending_unmop(xnb_t *xnbp)
{
-#ifdef XNB_DEBUG
RING_IDX loop;
+#ifdef XNB_DEBUG
gnttab_unmap_grant_ref_t *unmop;
#endif /* XNB_DEBUG */
- ASSERT(MUTEX_HELD(&xnbp->x_rx_lock));
-
- if (xnbp->x_rx_unmop_count == 0)
- return;
+ ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
+ ASSERT(xnbp->xnb_rx_unmop_count > 0);
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
- xnbp->x_rx_unmop, xnbp->x_rx_unmop_count) < 0) {
+ xnbp->xnb_rx_unmop, xnbp->xnb_rx_unmop_count) < 0) {
cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: "
"unmap grant operation failed, "
- "%d pages lost", xnbp->x_rx_unmop_count);
+ "%d pages lost", xnbp->xnb_rx_unmop_count);
}
#ifdef XNB_DEBUG
- for (loop = 0, unmop = xnbp->x_rx_unmop;
- loop < xnbp->x_rx_unmop_count;
+ for (loop = 0, unmop = xnbp->xnb_rx_unmop;
+ loop < xnbp->xnb_rx_unmop_count;
loop++, unmop++) {
if (unmop->status != 0) {
cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: "
@@ -923,10 +1356,27 @@ xnb_rx_perform_pending_unmop(xnb_t *xnbp)
}
#endif /* XNB_DEBUG */
- xnbp->x_rx_unmop_count = 0;
+ for (loop = 0; loop < xnbp->xnb_rx_unmop_count; loop++) {
+ xnb_rxbuf_t *rxp = xnbp->xnb_rx_unmop_rxp[loop];
+
+ if (rxp == NULL)
+ cmn_err(CE_PANIC,
+ "xnb_rx_perform_pending_unmop: "
+ "unexpected NULL rxp (loop %d; count %d)!",
+ loop, xnbp->xnb_rx_unmop_count);
+
+ if (xnbp->xnb_connected)
+ xnb_rx_mark_complete(xnbp, rxp->xr_id, rxp->xr_status);
+ xnb_rxbuf_put(xnbp, rxp);
+ }
+ if (xnbp->xnb_connected)
+ xnb_rx_notify_peer(xnbp);
+
+ xnbp->xnb_rx_unmop_count = 0;
#ifdef XNB_DEBUG
- bzero(xnbp->x_rx_unmop, sizeof (xnbp->x_rx_unmop));
+ bzero(xnbp->xnb_rx_unmop, sizeof (xnbp->xnb_rx_unmop));
+ bzero(xnbp->xnb_rx_unmop_rxp, sizeof (xnbp->xnb_rx_unmop_rxp));
#endif /* XNB_DEBUG */
}
@@ -935,7 +1385,7 @@ xnb_rxbuf_get(xnb_t *xnbp, int flags)
{
xnb_rxbuf_t *rxp;
- ASSERT(MUTEX_HELD(&xnbp->x_rx_lock));
+ ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
rxp = kmem_cache_alloc(xnb_rxbuf_cachep, flags);
if (rxp != NULL) {
@@ -943,13 +1393,13 @@ xnb_rxbuf_get(xnb_t *xnbp, int flags)
rxp->xr_flags |= XNB_RXBUF_INUSE;
rxp->xr_xnbp = xnbp;
- rxp->xr_mop.dom = xnbp->x_peer;
+ rxp->xr_mop.dom = xnbp->xnb_peer;
rxp->xr_mop.flags = GNTMAP_host_map;
- if (!xnbp->x_rx_pages_writable)
+ if (!xnbp->xnb_rx_pages_writable)
rxp->xr_mop.flags |= GNTMAP_readonly;
- xnbp->x_rx_buf_count++;
+ xnbp->xnb_rx_buf_count++;
}
return (rxp);
@@ -958,11 +1408,11 @@ xnb_rxbuf_get(xnb_t *xnbp, int flags)
static void
xnb_rxbuf_put(xnb_t *xnbp, xnb_rxbuf_t *rxp)
{
- ASSERT(MUTEX_HELD(&xnbp->x_rx_lock));
+ ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE);
rxp->xr_flags &= ~XNB_RXBUF_INUSE;
- xnbp->x_rx_buf_count--;
+ xnbp->xnb_rx_buf_count--;
kmem_cache_free(xnb_rxbuf_cachep, rxp);
}
@@ -982,7 +1432,7 @@ xnb_recv(xnb_t *xnbp)
* packet be destined for this host) will modify the packet
* 'in place'.
*/
- boolean_t copy = !xnbp->x_rx_pages_writable;
+ boolean_t copy = !xnbp->xnb_rx_pages_writable;
/*
* For each individual request, the sequence of actions is:
@@ -1001,21 +1451,19 @@ xnb_recv(xnb_t *xnbp)
head = tail = NULL;
around:
- ASSERT(MUTEX_HELD(&xnbp->x_rx_lock));
+ ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
- /*LINTED: constant in conditional context*/
- RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->x_tx_ring, work_to_do);
+ /* LINTED: constant in conditional context */
+ RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do);
if (!work_to_do) {
finished:
- xnb_rx_notify_peer(xnbp);
-
return (head);
}
- start = xnbp->x_tx_ring.req_cons;
- end = xnbp->x_tx_ring.sring->req_prod;
+ start = xnbp->xnb_tx_ring.req_cons;
+ end = xnbp->xnb_tx_ring.sring->req_prod;
- for (loop = start, mop = xnbp->x_rx_mop, rxpp = xnbp->x_rx_bufp;
+ for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp;
loop != end;
loop++, mop++, rxpp++) {
xnb_rxbuf_t *rxp;
@@ -1024,12 +1472,12 @@ finished:
if (rxp == NULL)
break;
- ASSERT(xnbp->x_rx_pages_writable ||
+ ASSERT(xnbp->xnb_rx_pages_writable ||
((rxp->xr_mop.flags & GNTMAP_readonly)
== GNTMAP_readonly));
rxp->xr_mop.ref =
- RING_GET_REQUEST(&xnbp->x_tx_ring, loop)->gref;
+ RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref;
ASSERT(rxp->xr_mop.ref < NR_GRANT_ENTRIES);
@@ -1043,12 +1491,12 @@ finished:
end = loop;
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
- xnbp->x_rx_mop, end - start) != 0) {
+ xnbp->xnb_rx_mop, end - start) != 0) {
cmn_err(CE_WARN, "xnb_recv: map grant operation failed");
loop = start;
- rxpp = xnbp->x_rx_bufp;
+ rxpp = xnbp->xnb_rx_bufp;
while (loop != end) {
xnb_rxbuf_put(xnbp, *rxpp);
@@ -1060,7 +1508,7 @@ finished:
goto finished;
}
- for (loop = start, mop = xnbp->x_rx_mop, rxpp = xnbp->x_rx_bufp;
+ for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp;
loop != end;
loop++, mop++, rxpp++) {
mblk_t *mp = NULL;
@@ -1074,14 +1522,14 @@ finished:
status = NETIF_RSP_ERROR;
}
- txreq = RING_GET_REQUEST(&xnbp->x_tx_ring, loop);
+ txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
if (status == NETIF_RSP_OKAY) {
if (copy) {
mp = allocb(txreq->size, BPRI_MED);
if (mp == NULL) {
status = NETIF_RSP_ERROR;
- xnbp->x_stat_rx_allocb_failed++;
+ xnbp->xnb_stat_rx_allocb_failed++;
} else {
bcopy((caddr_t)(uintptr_t)
mop->host_addr + txreq->offset,
@@ -1089,12 +1537,12 @@ finished:
mp->b_wptr += txreq->size;
}
} else {
- mp = desballoc((unsigned char *)(uintptr_t)
+ mp = desballoc((uchar_t *)(uintptr_t)
mop->host_addr + txreq->offset,
txreq->size, 0, &rxp->xr_free_rtn);
if (mp == NULL) {
status = NETIF_RSP_ERROR;
- xnbp->x_stat_rx_allocb_failed++;
+ xnbp->xnb_stat_rx_allocb_failed++;
} else {
rxp->xr_id = txreq->id;
rxp->xr_status = status;
@@ -1112,20 +1560,21 @@ finished:
((txreq->flags &
(NETTXF_csum_blank | NETTXF_data_validated))
!= 0)) {
- mp = xnbp->x_flavour->xf_cksum_from_peer(xnbp,
+ mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp,
mp, txreq->flags);
- xnbp->x_stat_rx_cksum_no_need++;
+ xnbp->xnb_stat_rx_cksum_no_need++;
}
}
if (copy || (mp == NULL)) {
- xnb_rx_mark_complete(xnbp, txreq->id, status);
- xnb_rx_schedule_unmop(xnbp, mop);
+ rxp->xr_status = status;
+ rxp->xr_id = txreq->id;
+ xnb_rx_schedule_unmop(xnbp, mop, rxp);
}
if (mp != NULL) {
- xnbp->x_stat_ipackets++;
- xnbp->x_stat_rbytes += txreq->size;
+ xnbp->xnb_stat_ipackets++;
+ xnbp->xnb_stat_rbytes += txreq->size;
mp->b_next = NULL;
if (head == NULL) {
@@ -1139,23 +1588,7 @@ finished:
}
}
- /*
- * This has to be here rather than in the 'finished' code
- * because we can only handle NET_TX_RING_SIZE pending unmap
- * operations, which may be exceeded by multiple trips around
- * the receive loop during heavy load (one trip around the
- * loop cannot generate more than NET_TX_RING_SIZE unmap
- * operations).
- */
- xnb_rx_perform_pending_unmop(xnbp);
- if (copy) {
- for (loop = start, rxpp = xnbp->x_rx_bufp;
- loop != end;
- loop++, rxpp++)
- xnb_rxbuf_put(xnbp, *rxpp);
- }
-
- xnbp->x_tx_ring.req_cons = loop;
+ xnbp->xnb_tx_ring.req_cons = loop;
goto around;
/* NOTREACHED */
@@ -1170,26 +1603,26 @@ xnb_intr(caddr_t arg)
xnb_t *xnbp = (xnb_t *)arg;
mblk_t *mp;
- xnbp->x_stat_intr++;
+ xnbp->xnb_stat_intr++;
- mutex_enter(&xnbp->x_rx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
- ASSERT(xnbp->x_connected);
+ ASSERT(xnbp->xnb_connected);
mp = xnb_recv(xnbp);
- mutex_exit(&xnbp->x_rx_lock);
+ mutex_exit(&xnbp->xnb_rx_lock);
- if (!xnbp->x_hotplugged) {
- xnbp->x_stat_rx_too_early++;
+ if (!xnbp->xnb_hotplugged) {
+ xnbp->xnb_stat_rx_too_early++;
goto fail;
}
if (mp == NULL) {
- xnbp->x_stat_spurious_intr++;
+ xnbp->xnb_stat_spurious_intr++;
goto fail;
}
- xnbp->x_flavour->xf_recv(xnbp, mp);
+ xnbp->xnb_flavour->xf_recv(xnbp, mp);
return (DDI_INTR_CLAIMED);
@@ -1210,14 +1643,14 @@ xnb_connect_rings(dev_info_t *dip)
/*
* Cannot attempt to connect the rings if already connected.
*/
- ASSERT(!xnbp->x_connected);
+ ASSERT(!xnbp->xnb_connected);
oename = xvdi_get_oename(dip);
if (xenbus_gather(XBT_NULL, oename,
"event-channel", "%u", &evtchn,
- "tx-ring-ref", "%lu", &xnbp->x_tx_ring_ref,
- "rx-ring-ref", "%lu", &xnbp->x_rx_ring_ref,
+ "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref,
+ "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref,
NULL) != 0) {
cmn_err(CE_WARN, "xnb_connect_rings: "
"cannot read other-end details from %s",
@@ -1229,13 +1662,20 @@ xnb_connect_rings(dev_info_t *dip)
"feature-tx-writable", "%d", &i) != 0)
i = 0;
if (i != 0)
- xnbp->x_rx_pages_writable = B_TRUE;
+ xnbp->xnb_rx_pages_writable = B_TRUE;
if (xenbus_scanf(XBT_NULL, oename,
"feature-no-csum-offload", "%d", &i) != 0)
i = 0;
- if ((i == 1) || !xnbp->x_cksum_offload)
- xnbp->x_cksum_offload = B_FALSE;
+ if ((i == 1) || !xnbp->xnb_cksum_offload)
+ xnbp->xnb_cksum_offload = B_FALSE;
+
+ /* Check whether our peer knows and requests hypervisor copy */
+ if (xenbus_scanf(XBT_NULL, oename, "request-rx-copy", "%d", &i)
+ != 0)
+ i = 0;
+ if (i != 0)
+ xnbp->xnb_hv_copy = B_TRUE;
/*
* 1. allocate a vaddr for the tx page, one for the rx page.
@@ -1249,57 +1689,57 @@ xnb_connect_rings(dev_info_t *dip)
*/
/* 1.tx */
- xnbp->x_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
+ xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
0, 0, 0, 0, VM_SLEEP);
- ASSERT(xnbp->x_tx_ring_addr != NULL);
+ ASSERT(xnbp->xnb_tx_ring_addr != NULL);
/* 2.tx */
- map_op.host_addr = (uint64_t)((long)xnbp->x_tx_ring_addr);
+ map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr);
map_op.flags = GNTMAP_host_map;
- map_op.ref = xnbp->x_tx_ring_ref;
- map_op.dom = xnbp->x_peer;
- hat_prepare_mapping(kas.a_hat, xnbp->x_tx_ring_addr);
+ map_op.ref = xnbp->xnb_tx_ring_ref;
+ map_op.dom = xnbp->xnb_peer;
+ hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
&map_op, 1) != 0 || map_op.status != 0) {
cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page.");
goto fail;
}
- xnbp->x_tx_ring_handle = map_op.handle;
+ xnbp->xnb_tx_ring_handle = map_op.handle;
- /*LINTED: constant in conditional context*/
- BACK_RING_INIT(&xnbp->x_tx_ring,
- (netif_tx_sring_t *)xnbp->x_tx_ring_addr, PAGESIZE);
+ /* LINTED: constant in conditional context */
+ BACK_RING_INIT(&xnbp->xnb_tx_ring,
+ (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
/* 1.rx */
- xnbp->x_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
+ xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
0, 0, 0, 0, VM_SLEEP);
- ASSERT(xnbp->x_rx_ring_addr != NULL);
+ ASSERT(xnbp->xnb_rx_ring_addr != NULL);
/* 2.rx */
- map_op.host_addr = (uint64_t)((long)xnbp->x_rx_ring_addr);
+ map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr);
map_op.flags = GNTMAP_host_map;
- map_op.ref = xnbp->x_rx_ring_ref;
- map_op.dom = xnbp->x_peer;
- hat_prepare_mapping(kas.a_hat, xnbp->x_rx_ring_addr);
+ map_op.ref = xnbp->xnb_rx_ring_ref;
+ map_op.dom = xnbp->xnb_peer;
+ hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
&map_op, 1) != 0 || map_op.status != 0) {
cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page.");
goto fail;
}
- xnbp->x_rx_ring_handle = map_op.handle;
+ xnbp->xnb_rx_ring_handle = map_op.handle;
- /*LINTED: constant in conditional context*/
- BACK_RING_INIT(&xnbp->x_rx_ring,
- (netif_rx_sring_t *)xnbp->x_rx_ring_addr, PAGESIZE);
+ /* LINTED: constant in conditional context */
+ BACK_RING_INIT(&xnbp->xnb_rx_ring,
+ (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE);
/* 3 */
if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) {
cmn_err(CE_WARN, "xnb_connect_rings: "
- "cannot bind event channel %d", xnbp->x_evtchn);
- xnbp->x_evtchn = INVALID_EVTCHN;
+ "cannot bind event channel %d", xnbp->xnb_evtchn);
+ xnbp->xnb_evtchn = INVALID_EVTCHN;
goto fail;
}
- xnbp->x_evtchn = xvdi_get_evtchn(dip);
+ xnbp->xnb_evtchn = xvdi_get_evtchn(dip);
/*
* It would be good to set the state to XenbusStateConnected
@@ -1307,14 +1747,14 @@ xnb_connect_rings(dev_info_t *dip)
* Changing the state in the store will be noticed by the peer
* and cannot be "taken back".
*/
- mutex_enter(&xnbp->x_tx_lock);
- mutex_enter(&xnbp->x_rx_lock);
+ mutex_enter(&xnbp->xnb_tx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
/* 5.1 */
- xnbp->x_connected = B_TRUE;
+ xnbp->xnb_connected = B_TRUE;
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
/* 4, 6 */
if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp)
@@ -1322,7 +1762,7 @@ xnb_connect_rings(dev_info_t *dip)
cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt");
goto fail;
}
- xnbp->x_irq = B_TRUE;
+ xnbp->xnb_irq = B_TRUE;
/* 5.2 */
(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
@@ -1330,13 +1770,12 @@ xnb_connect_rings(dev_info_t *dip)
return (B_TRUE);
fail:
- mutex_enter(&xnbp->x_tx_lock);
- mutex_enter(&xnbp->x_rx_lock);
-
- xnbp->x_connected = B_FALSE;
+ mutex_enter(&xnbp->xnb_tx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ xnbp->xnb_connected = B_FALSE;
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
return (B_FALSE);
}
@@ -1346,56 +1785,61 @@ xnb_disconnect_rings(dev_info_t *dip)
{
xnb_t *xnbp = ddi_get_driver_private(dip);
- if (xnbp->x_irq) {
+ if (xnbp->xnb_irq) {
ddi_remove_intr(dip, 0, NULL);
- xnbp->x_irq = B_FALSE;
+ xnbp->xnb_irq = B_FALSE;
}
- if (xnbp->x_evtchn != INVALID_EVTCHN) {
+ if (xnbp->xnb_rx_unmop_count > 0)
+ xnb_rx_perform_pending_unmop(xnbp);
+
+ if (xnbp->xnb_evtchn != INVALID_EVTCHN) {
xvdi_free_evtchn(dip);
- xnbp->x_evtchn = INVALID_EVTCHN;
+ xnbp->xnb_evtchn = INVALID_EVTCHN;
}
- if (xnbp->x_rx_ring_handle != INVALID_GRANT_HANDLE) {
+ if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) {
struct gnttab_unmap_grant_ref unmap_op;
- unmap_op.host_addr = (uint64_t)(uintptr_t)xnbp->x_rx_ring_addr;
+ unmap_op.host_addr = (uint64_t)(uintptr_t)
+ xnbp->xnb_rx_ring_addr;
unmap_op.dev_bus_addr = 0;
- unmap_op.handle = xnbp->x_rx_ring_handle;
+ unmap_op.handle = xnbp->xnb_rx_ring_handle;
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
&unmap_op, 1) != 0)
cmn_err(CE_WARN, "xnb_disconnect_rings: "
"cannot unmap rx-ring page (%d)",
unmap_op.status);
- xnbp->x_rx_ring_handle = INVALID_GRANT_HANDLE;
+ xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
}
- if (xnbp->x_rx_ring_addr != NULL) {
- hat_release_mapping(kas.a_hat, xnbp->x_rx_ring_addr);
- vmem_free(heap_arena, xnbp->x_rx_ring_addr, PAGESIZE);
- xnbp->x_rx_ring_addr = NULL;
+ if (xnbp->xnb_rx_ring_addr != NULL) {
+ hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
+ vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE);
+ xnbp->xnb_rx_ring_addr = NULL;
}
- if (xnbp->x_tx_ring_handle != INVALID_GRANT_HANDLE) {
+ if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) {
struct gnttab_unmap_grant_ref unmap_op;
- unmap_op.host_addr = (uint64_t)(uintptr_t)xnbp->x_tx_ring_addr;
+ unmap_op.host_addr = (uint64_t)(uintptr_t)
+ xnbp->xnb_tx_ring_addr;
unmap_op.dev_bus_addr = 0;
- unmap_op.handle = xnbp->x_tx_ring_handle;
+ unmap_op.handle = xnbp->xnb_tx_ring_handle;
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
&unmap_op, 1) != 0)
cmn_err(CE_WARN, "xnb_disconnect_rings: "
"cannot unmap tx-ring page (%d)",
unmap_op.status);
- xnbp->x_tx_ring_handle = INVALID_GRANT_HANDLE;
+ xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
}
- if (xnbp->x_tx_ring_addr != NULL) {
- hat_release_mapping(kas.a_hat, xnbp->x_tx_ring_addr);
- vmem_free(heap_arena, xnbp->x_tx_ring_addr, PAGESIZE);
- xnbp->x_tx_ring_addr = NULL;
+ if (xnbp->xnb_tx_ring_addr != NULL) {
+ hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
+ vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE);
+ xnbp->xnb_tx_ring_addr = NULL;
}
}
@@ -1412,9 +1856,9 @@ xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
switch (new_state) {
case XenbusStateConnected:
if (xnb_connect_rings(dip)) {
- xnbp->x_flavour->xf_peer_connected(xnbp);
+ xnbp->xnb_flavour->xf_peer_connected(xnbp);
} else {
- xnbp->x_flavour->xf_peer_disconnected(xnbp);
+ xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
xnb_disconnect_rings(dip);
(void) xvdi_switch_state(dip, XBT_NULL,
XenbusStateClosed);
@@ -1425,7 +1869,7 @@ xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
* Now that we've attempted to connect it's reasonable
* to allow an attempt to detach.
*/
- xnbp->x_detachable = B_TRUE;
+ xnbp->xnb_detachable = B_TRUE;
break;
@@ -1435,16 +1879,16 @@ xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
break;
case XenbusStateClosed:
- xnbp->x_flavour->xf_peer_disconnected(xnbp);
+ xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
- mutex_enter(&xnbp->x_tx_lock);
- mutex_enter(&xnbp->x_rx_lock);
+ mutex_enter(&xnbp->xnb_tx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
xnb_disconnect_rings(dip);
- xnbp->x_connected = B_FALSE;
+ xnbp->xnb_connected = B_FALSE;
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
(void) xvdi_post_event(dip, XEN_HP_REMOVE);
@@ -1455,7 +1899,7 @@ xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
* having been through the case above, so we set it to
* be sure.
*/
- xnbp->x_detachable = B_TRUE;
+ xnbp->xnb_detachable = B_TRUE;
break;
@@ -1478,15 +1922,15 @@ xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
switch (state) {
case Connected:
- success = xnbp->x_flavour->xf_hotplug_connected(xnbp);
+ success = xnbp->xnb_flavour->xf_hotplug_connected(xnbp);
- mutex_enter(&xnbp->x_tx_lock);
- mutex_enter(&xnbp->x_rx_lock);
+ mutex_enter(&xnbp->xnb_tx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
- xnbp->x_hotplugged = success;
+ xnbp->xnb_hotplugged = success;
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
break;
default:
diff --git a/usr/src/uts/common/xen/io/xnb.h b/usr/src/uts/common/xen/io/xnb.h
index 16ba897727..8da45c82ea 100644
--- a/usr/src/uts/common/xen/io/xnb.h
+++ b/usr/src/uts/common/xen/io/xnb.h
@@ -94,84 +94,98 @@ typedef struct xnb_rxbuf {
/* Per network-interface-controller driver private structure */
struct xnb {
/* most interesting stuff first to assist debugging */
- dev_info_t *x_devinfo; /* System per-device info. */
+ dev_info_t *xnb_devinfo; /* System per-device info. */
- xnb_flavour_t *x_flavour;
- void *x_flavour_data;
+ xnb_flavour_t *xnb_flavour;
+ void *xnb_flavour_data;
- boolean_t x_irq;
- unsigned char x_mac_addr[ETHERADDRL];
+ boolean_t xnb_irq;
+ unsigned char xnb_mac_addr[ETHERADDRL];
- uint64_t x_stat_ipackets;
- uint64_t x_stat_opackets;
- uint64_t x_stat_rbytes;
- uint64_t x_stat_obytes;
+ uint64_t xnb_stat_ipackets;
+ uint64_t xnb_stat_opackets;
+ uint64_t xnb_stat_rbytes;
+ uint64_t xnb_stat_obytes;
- uint64_t x_stat_intr;
- uint64_t x_stat_xmit_defer;
+ uint64_t xnb_stat_intr;
+ uint64_t xnb_stat_xmit_defer;
- uint64_t x_stat_tx_cksum_deferred;
- uint64_t x_stat_rx_cksum_no_need;
+ uint64_t xnb_stat_tx_cksum_deferred;
+ uint64_t xnb_stat_rx_cksum_no_need;
- uint64_t x_stat_tx_notify_sent;
- uint64_t x_stat_tx_notify_deferred;
+ uint64_t xnb_stat_tx_notify_sent;
+ uint64_t xnb_stat_tx_notify_deferred;
- uint64_t x_stat_rx_notify_sent;
- uint64_t x_stat_rx_notify_deferred;
+ uint64_t xnb_stat_rx_notify_sent;
+ uint64_t xnb_stat_rx_notify_deferred;
- uint64_t x_stat_tx_too_early;
- uint64_t x_stat_rx_too_early;
- uint64_t x_stat_rx_allocb_failed;
- uint64_t x_stat_mac_full;
- uint64_t x_stat_spurious_intr;
- uint64_t x_stat_allocation_success;
- uint64_t x_stat_allocation_failure;
- uint64_t x_stat_small_allocation_success;
- uint64_t x_stat_small_allocation_failure;
+ uint64_t xnb_stat_tx_too_early;
+ uint64_t xnb_stat_rx_too_early;
+ uint64_t xnb_stat_rx_allocb_failed;
+ uint64_t xnb_stat_tx_allocb_failed;
+ uint64_t xnb_stat_tx_foreign_page;
+ uint64_t xnb_stat_mac_full;
+ uint64_t xnb_stat_spurious_intr;
+ uint64_t xnb_stat_allocation_success;
+ uint64_t xnb_stat_allocation_failure;
+ uint64_t xnb_stat_small_allocation_success;
+ uint64_t xnb_stat_small_allocation_failure;
+ uint64_t xnb_stat_other_allocation_failure;
- uint64_t x_stat_csum_hardware;
- uint64_t x_stat_csum_software;
+ uint64_t xnb_stat_tx_pagebndry_crossed;
+ uint64_t xnb_stat_tx_cpoparea_grown;
- kstat_t *x_kstat_aux;
+ uint64_t xnb_stat_csum_hardware;
+ uint64_t xnb_stat_csum_software;
- boolean_t x_cksum_offload;
+ kstat_t *xnb_kstat_aux;
- ddi_iblock_cookie_t x_icookie;
+ boolean_t xnb_cksum_offload;
- kmutex_t x_rx_lock;
- kmutex_t x_tx_lock;
+ ddi_iblock_cookie_t xnb_icookie;
- int x_rx_unmop_count;
- int x_rx_buf_count;
- boolean_t x_rx_pages_writable;
+ kmutex_t xnb_rx_lock;
+ kmutex_t xnb_tx_lock;
- netif_rx_back_ring_t x_rx_ring; /* rx interface struct ptr */
- void *x_rx_ring_addr;
- grant_ref_t x_rx_ring_ref;
- grant_handle_t x_rx_ring_handle;
+ int xnb_rx_unmop_count;
+ int xnb_rx_buf_count;
+ boolean_t xnb_rx_pages_writable;
- netif_tx_back_ring_t x_tx_ring; /* tx interface struct ptr */
- void *x_tx_ring_addr;
- grant_ref_t x_tx_ring_ref;
- grant_handle_t x_tx_ring_handle;
+ netif_rx_back_ring_t xnb_rx_ring; /* rx interface struct ptr */
+ void *xnb_rx_ring_addr;
+ grant_ref_t xnb_rx_ring_ref;
+ grant_handle_t xnb_rx_ring_handle;
- boolean_t x_connected;
- boolean_t x_hotplugged;
- boolean_t x_detachable;
- int x_evtchn; /* channel to front end */
- domid_t x_peer;
+ netif_tx_back_ring_t xnb_tx_ring; /* tx interface struct ptr */
+ void *xnb_tx_ring_addr;
+ grant_ref_t xnb_tx_ring_ref;
+ grant_handle_t xnb_tx_ring_handle;
- xnb_rxbuf_t *x_rx_bufp[NET_TX_RING_SIZE];
- gnttab_map_grant_ref_t x_rx_mop[NET_TX_RING_SIZE];
- gnttab_unmap_grant_ref_t x_rx_unmop[NET_TX_RING_SIZE];
+ boolean_t xnb_connected;
+ boolean_t xnb_hotplugged;
+ boolean_t xnb_detachable;
+ int xnb_evtchn; /* channel to front end */
+ domid_t xnb_peer;
- caddr_t x_tx_va;
- gnttab_transfer_t x_tx_top[NET_RX_RING_SIZE];
+ xnb_rxbuf_t *xnb_rx_bufp[NET_TX_RING_SIZE];
+ gnttab_map_grant_ref_t xnb_rx_mop[NET_TX_RING_SIZE];
+ gnttab_unmap_grant_ref_t xnb_rx_unmop[NET_TX_RING_SIZE];
+
+ /* store information for unmop */
+ xnb_rxbuf_t *xnb_rx_unmop_rxp[NET_TX_RING_SIZE];
+
+ caddr_t xnb_tx_va;
+ gnttab_transfer_t xnb_tx_top[NET_RX_RING_SIZE];
+
+ boolean_t xnb_hv_copy; /* do we do hypervisor copy? */
+ gnttab_copy_t *xnb_tx_cpop;
+#define CPOP_DEFCNT 8
+ size_t xnb_cpop_sz; /* in elements, not bytes */
};
extern int xnb_attach(dev_info_t *, xnb_flavour_t *, void *);
extern void xnb_detach(dev_info_t *);
-extern mblk_t *xnb_to_peer(xnb_t *, mblk_t *);
+extern mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *);
extern mblk_t *xnb_process_cksum_flags(xnb_t *, mblk_t *, uint32_t);
#ifdef __cplusplus
diff --git a/usr/src/uts/common/xen/io/xnbo.c b/usr/src/uts/common/xen/io/xnbo.c
index a7d2190cda..723d650c55 100644
--- a/usr/src/uts/common/xen/io/xnbo.c
+++ b/usr/src/uts/common/xen/io/xnbo.c
@@ -63,19 +63,19 @@ static void xnbo_close_mac(xnbo_t *);
static void
xnbo_to_mac(xnb_t *xnbp, mblk_t *mp)
{
- xnbo_t *xnbop = xnbp->x_flavour_data;
+ xnbo_t *xnbop = xnbp->xnb_flavour_data;
ASSERT(mp != NULL);
if (!xnbop->o_running) {
- xnbp->x_stat_rx_too_early++;
+ xnbp->xnb_stat_rx_too_early++;
goto fail;
}
mp = xnbop->o_mtx->mt_fn(xnbop->o_mtx->mt_arg, mp);
if (mp != NULL) {
- xnbp->x_stat_mac_full++;
+ xnbp->xnb_stat_mac_full++;
goto fail;
}
@@ -88,13 +88,13 @@ fail:
static mblk_t *
xnbo_cksum_from_peer(xnb_t *xnbp, mblk_t *mp, uint16_t flags)
{
- xnbo_t *xnbop = xnbp->x_flavour_data;
+ xnbo_t *xnbop = xnbp->xnb_flavour_data;
ASSERT(mp->b_next == NULL);
if ((flags & NETTXF_csum_blank) != 0) {
/*
- * It would be nice to ASSERT that xnbp->x_cksum_offload
+ * It would be nice to ASSERT that xnbp->xnb_cksum_offload
* is TRUE here, but some peers insist on assuming
* that it is available even when they have been told
* otherwise.
@@ -128,7 +128,7 @@ xnbo_cksum_to_peer(xnb_t *xnbp, mblk_t *mp)
* caller must use HCK_PARTIALCKSUM.
*/
- if (xnbp->x_cksum_offload) {
+ if (xnbp->xnb_cksum_offload) {
uint32_t pflags, csum;
/*
@@ -162,7 +162,7 @@ xnbo_from_mac(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
{
xnb_t *xnbp = arg;
- mp = xnb_to_peer(xnbp, mp);
+ mp = xnb_copy_to_peer(xnbp, mp);
if (mp != NULL)
freemsgchain(mp);
@@ -178,7 +178,7 @@ static void
xnbo_from_mac_filter(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
{
xnb_t *xnbp = arg;
- xnbo_t *xnbop = xnbp->x_flavour_data;
+ xnbo_t *xnbop = xnbp->xnb_flavour_data;
mblk_t *next, *keep, *keep_head, *free, *free_head;
keep = keep_head = free = free_head = NULL;
@@ -207,8 +207,8 @@ xnbo_from_mac_filter(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
continue;
}
- if (bcmp(hdr_info.mhi_daddr, xnbp->x_mac_addr,
- sizeof (xnbp->x_mac_addr)) == 0) {
+ if (bcmp(hdr_info.mhi_daddr, xnbp->xnb_mac_addr,
+ sizeof (xnbp->xnb_mac_addr)) == 0) {
ADD(keep, mp);
continue;
}
@@ -228,7 +228,7 @@ static void
xnbo_notify(void *arg, mac_notify_type_t type)
{
xnb_t *xnbp = arg;
- xnbo_t *xnbop = xnbp->x_flavour_data;
+ xnbo_t *xnbop = xnbp->xnb_flavour_data;
switch (type) {
case MAC_NOTE_PROMISC:
@@ -240,13 +240,13 @@ xnbo_notify(void *arg, mac_notify_type_t type)
static boolean_t
xnbo_open_mac(xnb_t *xnbp, char *mac)
{
- xnbo_t *xnbop = xnbp->x_flavour_data;
+ xnbo_t *xnbop = xnbp->xnb_flavour_data;
int err, need_rx_filter, need_setphysaddr, need_promiscuous;
const mac_info_t *mi;
char *xsname;
void (*rx_fn)(void *, mac_resource_handle_t, mblk_t *);
- xsname = xvdi_get_xsname(xnbp->x_devinfo);
+ xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
if ((err = mac_open(mac, &xnbop->o_mh)) != 0) {
cmn_err(CE_WARN, "xnbo_open_mac: "
@@ -313,10 +313,10 @@ xnbo_open_mac(xnb_t *xnbp, char *mac)
if (need_setphysaddr > 0) {
struct ether_addr ea;
- err = mac_unicst_set(xnbop->o_mh, xnbp->x_mac_addr);
+ err = mac_unicst_set(xnbop->o_mh, xnbp->xnb_mac_addr);
/* Warn, but continue on. */
if (err != 0) {
- bcopy(xnbp->x_mac_addr, ea.ether_addr_octet,
+ bcopy(xnbp->xnb_mac_addr, ea.ether_addr_octet,
ETHERADDRL);
cmn_err(CE_WARN, "xnbo_open_mac: "
"cannot set MAC address of %s to "
@@ -367,7 +367,7 @@ xnbo_hotplug(xnb_t *xnbp)
char *xsname;
char mac[LIFNAMSIZ];
- xsname = xvdi_get_xsname(xnbp->x_devinfo);
+ xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
if (xenbus_scanf(XBT_NULL, xsname, "nic", "%s", mac) != 0) {
cmn_err(CE_WARN, "xnbo_hotplug: "
"cannot read nic name from %s", xsname);
@@ -428,7 +428,7 @@ xnbo_connected(xnb_t *xnbp)
static void
xnbo_disconnected(xnb_t *xnbp)
{
- xnbo_close_mac(xnbp->x_flavour_data);
+ xnbo_close_mac(xnbp->xnb_flavour_data);
}
static int
@@ -469,7 +469,7 @@ static int
xnbo_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
xnb_t *xnbp = ddi_get_driver_private(dip);
- xnbo_t *xnbop = xnbp->x_flavour_data;
+ xnbo_t *xnbop = xnbp->xnb_flavour_data;
switch (cmd) {
case DDI_DETACH:
@@ -480,19 +480,19 @@ xnbo_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
return (DDI_FAILURE);
}
- mutex_enter(&xnbp->x_tx_lock);
- mutex_enter(&xnbp->x_rx_lock);
+ mutex_enter(&xnbp->xnb_tx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
- if (!xnbp->x_detachable || xnbp->x_connected ||
- (xnbp->x_rx_buf_count > 0)) {
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ if (!xnbp->xnb_detachable || xnbp->xnb_connected ||
+ (xnbp->xnb_rx_buf_count > 0)) {
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
return (DDI_FAILURE);
}
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
xnbo_close_mac(xnbop);
kmem_free(xnbop, sizeof (*xnbop));
diff --git a/usr/src/uts/common/xen/io/xnbu.c b/usr/src/uts/common/xen/io/xnbu.c
index 1ed6067af0..fa9604194b 100644
--- a/usr/src/uts/common/xen/io/xnbu.c
+++ b/usr/src/uts/common/xen/io/xnbu.c
@@ -81,14 +81,14 @@ static mac_callbacks_t xnb_callbacks = {
static void
xnbu_to_host(xnb_t *xnbp, mblk_t *mp)
{
- xnbu_t *xnbup = xnbp->x_flavour_data;
+ xnbu_t *xnbup = xnbp->xnb_flavour_data;
boolean_t sched = B_FALSE;
ASSERT(mp != NULL);
mac_rx(xnbup->u_mh, xnbup->u_rx_handle, mp);
- mutex_enter(&xnbp->x_tx_lock);
+ mutex_enter(&xnbp->xnb_tx_lock);
/*
* If a transmit attempt failed because we ran out of ring
@@ -96,12 +96,12 @@ xnbu_to_host(xnb_t *xnbp, mblk_t *mp)
* path.
*/
if (xnbup->u_need_sched &&
- RING_HAS_UNCONSUMED_REQUESTS(&xnbp->x_rx_ring)) {
+ RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
sched = B_TRUE;
xnbup->u_need_sched = B_FALSE;
}
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
if (sched)
mac_tx_update(xnbup->u_mh);
@@ -155,7 +155,7 @@ xnbu_cksum_to_peer(xnb_t *xnbp, mblk_t *mp)
{
uint16_t r = 0;
- if (xnbp->x_cksum_offload) {
+ if (xnbp->xnb_cksum_offload) {
uint32_t pflags;
hcksum_retrieve(mp, NULL, NULL, NULL, NULL,
@@ -176,7 +176,7 @@ xnbu_cksum_to_peer(xnb_t *xnbp, mblk_t *mp)
static void
xnbu_connected(xnb_t *xnbp)
{
- xnbu_t *xnbup = xnbp->x_flavour_data;
+ xnbu_t *xnbup = xnbp->xnb_flavour_data;
mac_link_update(xnbup->u_mh, LINK_STATE_UP);
/*
@@ -188,7 +188,7 @@ xnbu_connected(xnb_t *xnbp)
static void
xnbu_disconnected(xnb_t *xnbp)
{
- xnbu_t *xnbup = xnbp->x_flavour_data;
+ xnbu_t *xnbup = xnbp->xnb_flavour_data;
mac_link_update(xnbup->u_mh, LINK_STATE_DOWN);
}
@@ -204,9 +204,9 @@ static mblk_t *
xnbu_m_send(void *arg, mblk_t *mp)
{
xnb_t *xnbp = arg;
- xnbu_t *xnbup = xnbp->x_flavour_data;
+ xnbu_t *xnbup = xnbp->xnb_flavour_data;
- mp = xnb_to_peer(arg, mp);
+ mp = xnb_copy_to_peer(arg, mp);
/* XXPV dme: playing with need_sched without txlock? */
@@ -239,10 +239,10 @@ static int
xnbu_m_set_mac_addr(void *arg, const uint8_t *macaddr)
{
xnb_t *xnbp = arg;
- xnbu_t *xnbup = xnbp->x_flavour_data;
+ xnbu_t *xnbup = xnbp->xnb_flavour_data;
- bcopy(macaddr, xnbp->x_mac_addr, ETHERADDRL);
- mac_unicst_update(xnbup->u_mh, xnbp->x_mac_addr);
+ bcopy(macaddr, xnbp->xnb_mac_addr, ETHERADDRL);
+ mac_unicst_update(xnbup->u_mh, xnbp->xnb_mac_addr);
return (0);
}
@@ -300,12 +300,12 @@ xnbu_m_stat(void *arg, uint_t stat, uint64_t *val)
{
xnb_t *xnbp = arg;
- mutex_enter(&xnbp->x_tx_lock);
- mutex_enter(&xnbp->x_rx_lock);
+ mutex_enter(&xnbp->xnb_tx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
#define map_stat(q, r) \
case (MAC_STAT_##q): \
- *val = xnbp->x_stat_##r; \
+ *val = xnbp->xnb_stat_##r; \
break
switch (stat) {
@@ -316,16 +316,16 @@ xnbu_m_stat(void *arg, uint_t stat, uint64_t *val)
map_stat(OBYTES, obytes);
default:
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
return (ENOTSUP);
}
#undef map_stat
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
return (0);
}
@@ -343,7 +343,7 @@ static void
xnbu_m_resources(void *arg)
{
xnb_t *xnbp = arg;
- xnbu_t *xnbup = xnbp->x_flavour_data;
+ xnbu_t *xnbup = xnbp->xnb_flavour_data;
mac_rx_fifo_t mrf;
mrf.mrf_type = MAC_RX_FIFO;
@@ -365,7 +365,7 @@ xnbu_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
case MAC_CAPAB_HCKSUM: {
uint32_t *capab = cap_data;
- if (xnbp->x_cksum_offload)
+ if (xnbp->xnb_cksum_offload)
*capab = HCKSUM_INET_PARTIAL;
else
*capab = 0;
@@ -428,13 +428,13 @@ xnbu_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
* used by the generic layer.
*/
mr->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
- mr->m_src_addr = xnbp->x_mac_addr;
+ mr->m_src_addr = xnbp->xnb_mac_addr;
mr->m_callbacks = &xnb_callbacks;
mr->m_min_sdu = 0;
mr->m_max_sdu = XNBMAXPKT;
- (void) memset(xnbp->x_mac_addr, 0xff, ETHERADDRL);
- xnbp->x_mac_addr[0] &= 0xfe;
+ (void) memset(xnbp->xnb_mac_addr, 0xff, ETHERADDRL);
+ xnbp->xnb_mac_addr[0] &= 0xfe;
xnbup->u_need_sched = B_FALSE;
/*
@@ -458,7 +458,7 @@ int
xnbu_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
xnb_t *xnbp = ddi_get_driver_private(dip);
- xnbu_t *xnbup = xnbp->x_flavour_data;
+ xnbu_t *xnbup = xnbp->xnb_flavour_data;
switch (cmd) {
case DDI_DETACH:
@@ -472,19 +472,19 @@ xnbu_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
ASSERT(xnbp != NULL);
ASSERT(xnbup != NULL);
- mutex_enter(&xnbp->x_tx_lock);
- mutex_enter(&xnbp->x_rx_lock);
+ mutex_enter(&xnbp->xnb_tx_lock);
+ mutex_enter(&xnbp->xnb_rx_lock);
- if (!xnbp->x_detachable || xnbp->x_connected ||
- (xnbp->x_rx_buf_count > 0)) {
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ if (!xnbp->xnb_detachable || xnbp->xnb_connected ||
+ (xnbp->xnb_rx_buf_count > 0)) {
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
return (DDI_FAILURE);
}
- mutex_exit(&xnbp->x_rx_lock);
- mutex_exit(&xnbp->x_tx_lock);
+ mutex_exit(&xnbp->xnb_rx_lock);
+ mutex_exit(&xnbp->xnb_tx_lock);
/*
* Attempt to unregister the mac.
diff --git a/usr/src/uts/common/xen/io/xnf.c b/usr/src/uts/common/xen/io/xnf.c
index 4f457edf00..89a12e4d03 100644
--- a/usr/src/uts/common/xen/io/xnf.c
+++ b/usr/src/uts/common/xen/io/xnf.c
@@ -63,49 +63,42 @@
*/
#include <sys/types.h>
-#include <sys/hypervisor.h>
-#include <sys/debug.h>
#include <sys/errno.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
-#include <sys/stropts.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
-#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/devops.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
-#include <sys/ksynch.h>
#include <sys/dlpi.h>
#include <sys/ethernet.h>
#include <sys/strsun.h>
#include <sys/pattr.h>
-#include <inet/common.h>
#include <inet/ip.h>
-#include <sys/stat.h>
#include <sys/modctl.h>
#include <sys/mac.h>
#include <sys/mac_ether.h>
-#include <sys/atomic.h>
-#include <sys/errno.h>
-#include <sys/machsystm.h>
-#include <sys/bootconf.h>
-#include <sys/bootsvcs.h>
#include <sys/bootinfo.h>
-#include <sys/promif.h>
-#include <sys/archsystm.h>
-#include <sys/gnttab.h>
#include <sys/mach_mmu.h>
-#include <xen/public/memory.h>
-
-#include "xnf.h"
-
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#include <sys/hypervisor.h>
+#else
+#include <sys/hypervisor.h>
#include <sys/evtchn_impl.h>
#include <sys/balloon_impl.h>
+#endif
+#include <xen/public/io/netif.h>
+#include <sys/gnttab.h>
#include <xen/sys/xendev.h>
+#include <sys/sdt.h>
+
+#include <io/xnf.h>
+
/*
* Declarations and Module Linkage
@@ -127,6 +120,10 @@ int xnfdebug = 0;
#define xnf_btop(addr) ((addr) >> PAGESHIFT)
boolean_t xnf_cksum_offload = B_TRUE;
+
+/* Default value for hypervisor-based copy operations */
+boolean_t xnf_rx_hvcopy = B_TRUE;
+
/*
* Should pages used for transmit be readonly for the peer?
*/
@@ -164,17 +161,20 @@ static void xnf_release_dma_resources(xnf_t *);
static mblk_t *xnf_process_recv(xnf_t *);
static void xnf_rcv_complete(struct xnf_buffer_desc *);
static void xnf_release_mblks(xnf_t *);
-static struct xnf_buffer_desc *xnf_alloc_xmit_buffer(xnf_t *);
+static struct xnf_buffer_desc *xnf_alloc_tx_buffer(xnf_t *);
static struct xnf_buffer_desc *xnf_alloc_buffer(xnf_t *);
-static struct xnf_buffer_desc *xnf_get_xmit_buffer(xnf_t *);
+static struct xnf_buffer_desc *xnf_get_tx_buffer(xnf_t *);
static struct xnf_buffer_desc *xnf_get_buffer(xnf_t *);
static void xnf_free_buffer(struct xnf_buffer_desc *);
-static void xnf_free_xmit_buffer(struct xnf_buffer_desc *);
+static void xnf_free_tx_buffer(struct xnf_buffer_desc *);
void xnf_send_driver_status(int, int);
static void rx_buffer_hang(xnf_t *, struct xnf_buffer_desc *);
static int xnf_clean_tx_ring(xnf_t *);
static void oe_state_change(dev_info_t *, ddi_eventcookie_t,
void *, void *);
+static mblk_t *xnf_process_hvcopy_recv(xnf_t *xnfp);
+static boolean_t xnf_hvcopy_peer_status(dev_info_t *devinfo);
+static boolean_t xnf_kstat_init(xnf_t *xnfp);
/*
* XXPV dme: remove MC_IOCTL?
@@ -194,8 +194,8 @@ static mac_callbacks_t xnf_callbacks = {
};
#define GRANT_INVALID_REF 0
-int xnf_recv_bufs_lowat = 4 * NET_RX_RING_SIZE;
-int xnf_recv_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */
+const int xnf_rx_bufs_lowat = 4 * NET_RX_RING_SIZE;
+const int xnf_rx_bufs_hiwat = 8 * NET_RX_RING_SIZE; /* default max */
/* DMA attributes for network ring buffer */
static ddi_dma_attr_t ringbuf_dma_attr = {
@@ -300,134 +300,54 @@ _info(struct modinfo *modinfop)
return (mod_info(&modlinkage, modinfop));
}
-/*
- * Statistics.
- */
-/* XXPV: most of these names need re-"nice"ing */
-static char *xnf_aux_statistics[] = {
- "tx_cksum_deferred",
- "rx_cksum_no_need",
- "intr",
- "xmit_pullup",
- "xmit_pagebndry",
- "xmit_attempt",
- "rx_no_ringbuf",
- "mac_rcv_error",
- "runt",
-};
-
-static int
-xnf_kstat_aux_update(kstat_t *ksp, int flag)
-{
- xnf_t *xnfp;
- kstat_named_t *knp;
-
- if (flag != KSTAT_READ)
- return (EACCES);
-
- xnfp = ksp->ks_private;
- knp = ksp->ks_data;
-
- /*
- * Assignment order should match that of the names in
- * xnf_aux_statistics.
- */
- (knp++)->value.ui64 = xnfp->stat_tx_cksum_deferred;
- (knp++)->value.ui64 = xnfp->stat_rx_cksum_no_need;
-
- (knp++)->value.ui64 = xnfp->stat_intr;
- (knp++)->value.ui64 = xnfp->stat_xmit_pullup;
- (knp++)->value.ui64 = xnfp->stat_xmit_pagebndry;
- (knp++)->value.ui64 = xnfp->stat_xmit_attempt;
- (knp++)->value.ui64 = xnfp->stat_rx_no_ringbuf;
- (knp++)->value.ui64 = xnfp->stat_mac_rcv_error;
- (knp++)->value.ui64 = xnfp->stat_runt;
-
- return (0);
-}
-
-static boolean_t
-xnf_kstat_init(xnf_t *xnfp)
-{
- int nstat = sizeof (xnf_aux_statistics) /
- sizeof (xnf_aux_statistics[0]);
- char **cp = xnf_aux_statistics;
- kstat_named_t *knp;
-
- /*
- * Create and initialise kstats.
- */
- if ((xnfp->kstat_aux = kstat_create("xnf",
- ddi_get_instance(xnfp->devinfo),
- "aux_statistics", "net", KSTAT_TYPE_NAMED,
- nstat, 0)) == NULL)
- return (B_FALSE);
-
- xnfp->kstat_aux->ks_private = xnfp;
- xnfp->kstat_aux->ks_update = xnf_kstat_aux_update;
-
- knp = xnfp->kstat_aux->ks_data;
- while (nstat > 0) {
- kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
-
- knp++;
- cp++;
- nstat--;
- }
-
- kstat_install(xnfp->kstat_aux);
-
- return (B_TRUE);
-}
-
static int
xnf_setup_rings(xnf_t *xnfp)
{
int ix, err;
RING_IDX i;
- struct xnf_buffer_desc *bdesc, *rbp;
- struct xenbus_device *xsd;
- domid_t oeid;
+ struct xnf_buffer_desc *bdesc, *rbp;
+ struct xenbus_device *xsd;
+ domid_t oeid;
- oeid = xvdi_get_oeid(xnfp->devinfo);
- xsd = xvdi_get_xsd(xnfp->devinfo);
+ oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
+ xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
- if (xnfp->tx_ring_ref != GRANT_INVALID_REF)
- gnttab_end_foreign_access(xnfp->tx_ring_ref, 0, 0);
+ if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
err = gnttab_grant_foreign_access(oeid,
- xnf_btop(pa_to_ma(xnfp->tx_ring_phys_addr)), 0);
+ xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0);
if (err <= 0) {
err = -err;
xenbus_dev_error(xsd, err, "granting access to tx ring page");
goto out;
}
- xnfp->tx_ring_ref = (grant_ref_t)err;
+ xnfp->xnf_tx_ring_ref = (grant_ref_t)err;
- if (xnfp->rx_ring_ref != GRANT_INVALID_REF)
- gnttab_end_foreign_access(xnfp->rx_ring_ref, 0, 0);
+ if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
err = gnttab_grant_foreign_access(oeid,
- xnf_btop(pa_to_ma(xnfp->rx_ring_phys_addr)), 0);
+ xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0);
if (err <= 0) {
err = -err;
xenbus_dev_error(xsd, err, "granting access to rx ring page");
goto out;
}
- xnfp->rx_ring_ref = (grant_ref_t)err;
+ xnfp->xnf_rx_ring_ref = (grant_ref_t)err;
- mutex_enter(&xnfp->intrlock);
+ mutex_enter(&xnfp->xnf_intrlock);
/*
* Cleanup the TX ring. We just clean up any valid tx_pktinfo structs
* and reset the ring. Note that this can lose packets after a resume,
* but we expect to stagger on.
*/
- mutex_enter(&xnfp->txlock);
+ mutex_enter(&xnfp->xnf_txlock);
- for (i = 0; i < xnfp->n_xmits; i++) {
- struct tx_pktinfo *txp = &xnfp->tx_pkt_info[i];
+ for (i = 0; i < xnfp->xnf_n_tx; i++) {
+ struct tx_pktinfo *txp = &xnfp->xnf_tx_pkt_info[i];
txp->id = i + 1;
@@ -446,83 +366,105 @@ xnf_setup_rings(xnf_t *xnfp)
(void) ddi_dma_unbind_handle(txp->dma_handle);
if (txp->bdesc != NULL) {
- xnf_free_xmit_buffer(txp->bdesc);
+ xnf_free_tx_buffer(txp->bdesc);
txp->bdesc = NULL;
}
(void) gnttab_end_foreign_access_ref(txp->grant_ref,
- xnfp->tx_pages_readonly);
- gnttab_release_grant_reference(&xnfp->gref_tx_head,
+ xnfp->xnf_tx_pages_readonly);
+ gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head,
txp->grant_ref);
txp->grant_ref = GRANT_INVALID_REF;
}
- xnfp->tx_pkt_id_list = 0;
- xnfp->tx_ring.rsp_cons = 0;
- xnfp->tx_ring.sring->req_prod = 0;
- xnfp->tx_ring.sring->rsp_prod = 0;
- xnfp->tx_ring.sring->rsp_event = 1;
+ xnfp->xnf_tx_pkt_id_list = 0;
+ xnfp->xnf_tx_ring.rsp_cons = 0;
+ xnfp->xnf_tx_ring.sring->req_prod = 0;
+ xnfp->xnf_tx_ring.sring->rsp_prod = 0;
+ xnfp->xnf_tx_ring.sring->rsp_event = 1;
- mutex_exit(&xnfp->txlock);
+ mutex_exit(&xnfp->xnf_txlock);
/*
* Rebuild the RX ring. We have to rebuild the RX ring because some of
- * our pages are currently flipped out so we can't just free the RX
- * buffers. Reclaim any unprocessed recv buffers, they won't be
+ * our pages are currently flipped out/granted so we can't just free
+ * the RX buffers. Reclaim any unprocessed recv buffers, they won't be
* useable anyway since the mfn's they refer to are no longer valid.
* Grant the backend domain access to each hung rx buffer.
*/
- i = xnfp->rx_ring.rsp_cons;
- while (i++ != xnfp->rx_ring.sring->req_prod) {
+ i = xnfp->xnf_rx_ring.rsp_cons;
+ while (i++ != xnfp->xnf_rx_ring.sring->req_prod) {
volatile netif_rx_request_t *rxrp;
- rxrp = RING_GET_REQUEST(&xnfp->rx_ring, i);
- ix = rxrp - RING_GET_REQUEST(&xnfp->rx_ring, 0);
- rbp = xnfp->rxpkt_bufptr[ix];
+ rxrp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, i);
+ ix = rxrp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0);
+ rbp = xnfp->xnf_rxpkt_bufptr[ix];
if (rbp != NULL) {
- ASSERT(rbp->grant_ref != GRANT_INVALID_REF);
- gnttab_grant_foreign_transfer_ref(rbp->grant_ref,
- oeid);
+ grant_ref_t ref = rbp->grant_ref;
+
+ ASSERT(ref != GRANT_INVALID_REF);
+ if (xnfp->xnf_rx_hvcopy) {
+ pfn_t pfn = xnf_btop(rbp->buf_phys);
+ mfn_t mfn = pfn_to_mfn(pfn);
+
+ gnttab_grant_foreign_access_ref(ref, oeid,
+ mfn, 0);
+ } else {
+ gnttab_grant_foreign_transfer_ref(ref, oeid);
+ }
rxrp->id = ix;
- rxrp->gref = rbp->grant_ref;
+ rxrp->gref = ref;
}
}
+
/*
* Reset the ring pointers to initial state.
* Hang buffers for any empty ring slots.
*/
- xnfp->rx_ring.rsp_cons = 0;
- xnfp->rx_ring.sring->req_prod = 0;
- xnfp->rx_ring.sring->rsp_prod = 0;
- xnfp->rx_ring.sring->rsp_event = 1;
+ xnfp->xnf_rx_ring.rsp_cons = 0;
+ xnfp->xnf_rx_ring.sring->req_prod = 0;
+ xnfp->xnf_rx_ring.sring->rsp_prod = 0;
+ xnfp->xnf_rx_ring.sring->rsp_event = 1;
for (i = 0; i < NET_RX_RING_SIZE; i++) {
- xnfp->rx_ring.req_prod_pvt = i;
- if (xnfp->rxpkt_bufptr[i] != NULL)
+ xnfp->xnf_rx_ring.req_prod_pvt = i;
+ if (xnfp->xnf_rxpkt_bufptr[i] != NULL)
continue;
if ((bdesc = xnf_get_buffer(xnfp)) == NULL)
break;
rx_buffer_hang(xnfp, bdesc);
}
- xnfp->rx_ring.req_prod_pvt = i;
+ xnfp->xnf_rx_ring.req_prod_pvt = i;
/* LINTED: constant in conditional context */
- RING_PUSH_REQUESTS(&xnfp->rx_ring);
+ RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring);
- mutex_exit(&xnfp->intrlock);
+ mutex_exit(&xnfp->xnf_intrlock);
return (0);
out:
- if (xnfp->tx_ring_ref != GRANT_INVALID_REF)
- gnttab_end_foreign_access(xnfp->tx_ring_ref, 0, 0);
- xnfp->tx_ring_ref = GRANT_INVALID_REF;
+ if (xnfp->xnf_tx_ring_ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
+ xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF;
- if (xnfp->rx_ring_ref != GRANT_INVALID_REF)
- gnttab_end_foreign_access(xnfp->rx_ring_ref, 0, 0);
- xnfp->rx_ring_ref = GRANT_INVALID_REF;
+ if (xnfp->xnf_rx_ring_ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
+ xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF;
return (err);
}
+
+/* Called when the upper layers free a message we passed upstream */
+static void
+xnf_copy_rcv_complete(struct xnf_buffer_desc *bdesc)
+{
+ (void) ddi_dma_unbind_handle(bdesc->dma_handle);
+ ddi_dma_mem_free(&bdesc->acc_handle);
+ ddi_dma_free_handle(&bdesc->dma_handle);
+ kmem_free(bdesc, sizeof (*bdesc));
+}
+
+
/*
* Connect driver to back end, called to set up communication with
* back end driver both initially and on resume after restore/migrate.
@@ -533,16 +475,16 @@ xnf_be_connect(xnf_t *xnfp)
char mac[ETHERADDRL * 3];
const char *message;
xenbus_transaction_t xbt;
- struct xenbus_device *xsd;
+ struct xenbus_device *xsd;
char *xsname;
int err, be_no_cksum_offload;
- ASSERT(!xnfp->connected);
+ ASSERT(!xnfp->xnf_connected);
- xsd = xvdi_get_xsd(xnfp->devinfo);
- xsname = xvdi_get_xsname(xnfp->devinfo);
+ xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
+ xsname = xvdi_get_xsname(xnfp->xnf_devinfo);
- err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->devinfo), "mac",
+ err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo), "mac",
"%s", (char *)&mac[0]);
if (err != 0) {
/*
@@ -550,12 +492,12 @@ xnf_be_connect(xnf_t *xnfp)
* addr. at this point
*/
cmn_err(CE_WARN, "%s%d: no mac address",
- ddi_driver_name(xnfp->devinfo),
- ddi_get_instance(xnfp->devinfo));
+ ddi_driver_name(xnfp->xnf_devinfo),
+ ddi_get_instance(xnfp->xnf_devinfo));
return;
}
- if (ether_aton(mac, xnfp->mac_addr) != ETHERADDRL) {
+ if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) {
err = ENOENT;
xenbus_dev_error(xsd, ENOENT, "parsing %s/mac", xsname);
return;
@@ -568,7 +510,7 @@ xnf_be_connect(xnf_t *xnfp)
return;
}
- err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->devinfo),
+ err = xenbus_scanf(XBT_NULL, xvdi_get_oename(xnfp->xnf_devinfo),
"feature-no-csum-offload", "%d", &be_no_cksum_offload);
/*
* If we fail to read the store we assume that the key is
@@ -581,8 +523,8 @@ xnf_be_connect(xnf_t *xnfp)
* If the far end cannot do checksum offload or we do not wish
* to do it, disable it.
*/
- if ((be_no_cksum_offload == 1) || !xnfp->cksum_offload)
- xnfp->cksum_offload = B_FALSE;
+ if ((be_no_cksum_offload == 1) || !xnfp->xnf_cksum_offload)
+ xnfp->xnf_cksum_offload = B_FALSE;
again:
err = xenbus_transaction_start(&xbt);
@@ -592,20 +534,21 @@ again:
}
err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u",
- xnfp->tx_ring_ref);
+ xnfp->xnf_tx_ring_ref);
if (err != 0) {
message = "writing tx ring-ref";
goto abort_transaction;
}
err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u",
- xnfp->rx_ring_ref);
+ xnfp->xnf_rx_ring_ref);
if (err != 0) {
message = "writing rx ring-ref";
goto abort_transaction;
}
- err = xenbus_printf(xbt, xsname, "event-channel", "%u", xnfp->evtchn);
+ err = xenbus_printf(xbt, xsname, "event-channel", "%u",
+ xnfp->xnf_evtchn);
if (err != 0) {
message = "writing event-channel";
goto abort_transaction;
@@ -617,7 +560,7 @@ again:
goto abort_transaction;
}
- if (!xnfp->tx_pages_readonly) {
+ if (!xnfp->xnf_tx_pages_readonly) {
err = xenbus_printf(xbt, xsname, "feature-tx-writable",
"%d", 1);
if (err != 0) {
@@ -627,11 +570,17 @@ again:
}
err = xenbus_printf(xbt, xsname, "feature-no-csum-offload", "%d",
- xnfp->cksum_offload ? 0 : 1);
+ xnfp->xnf_cksum_offload ? 0 : 1);
if (err != 0) {
message = "writing feature-no-csum-offload";
goto abort_transaction;
}
+ err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d",
+ xnfp->xnf_rx_hvcopy ? 1 : 0);
+ if (err != 0) {
+ message = "writing request-rx-copy";
+ goto abort_transaction;
+ }
err = xenbus_printf(xbt, xsname, "state", "%d", XenbusStateConnected);
if (err != 0) {
@@ -677,20 +626,24 @@ xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
(void) xvdi_resume(devinfo);
(void) xvdi_alloc_evtchn(devinfo);
+ xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
+#ifdef XPV_HVM_DRIVER
+ ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr,
+ xnfp);
+#else
(void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr,
(caddr_t)xnfp);
- xnfp->evtchn = xvdi_get_evtchn(devinfo);
+#endif
xnf_be_connect(xnfp);
/*
- * Our MAC address didn't necessarily change, but
- * given that we may be resuming this OS instance
- * on a different machine (or on the same one and got a
- * different MAC address because we didn't specify one of
- * our own), it's useful to claim that
- * it changed in order that IP send out a
- * gratuitous ARP.
+ * Our MAC address may have changed if we're resuming:
+ * - on a different host
+ * - on the same one and got a different MAC address
+ * because we didn't specify one of our own.
+ * so it's useful to claim that it changed in order that
+ * IP send out a gratuitous ARP.
*/
- mac_unicst_update(xnfp->mh, xnfp->mac_addr);
+ mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr);
return (DDI_SUCCESS);
case DDI_ATTACH:
@@ -710,23 +663,32 @@ xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
macp->m_dip = devinfo;
macp->m_driver = xnfp;
- xnfp->devinfo = devinfo;
+ xnfp->xnf_devinfo = devinfo;
macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
- macp->m_src_addr = xnfp->mac_addr;
+ macp->m_src_addr = xnfp->xnf_mac_addr;
macp->m_callbacks = &xnf_callbacks;
macp->m_min_sdu = 0;
macp->m_max_sdu = XNF_MAXPKT;
- xnfp->running = B_FALSE;
- xnfp->connected = B_FALSE;
- xnfp->cksum_offload = xnf_cksum_offload;
- xnfp->tx_pages_readonly = xnf_tx_pages_readonly;
+ xnfp->xnf_running = B_FALSE;
+ xnfp->xnf_connected = B_FALSE;
+ xnfp->xnf_cksum_offload = xnf_cksum_offload;
+ xnfp->xnf_tx_pages_readonly = xnf_tx_pages_readonly;
+
+ xnfp->xnf_rx_hvcopy = xnf_hvcopy_peer_status(devinfo) && xnf_rx_hvcopy;
+#ifdef XPV_HVM_DRIVER
+ if (!xnfp->xnf_rx_hvcopy) {
+ cmn_err(CE_WARN, "The xnf driver requires a dom0 that "
+ "supports 'feature-rx-copy'");
+ goto failure;
+ }
+#endif
/*
* Get the iblock cookie with which to initialize the mutexes.
*/
- if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->icookie)
+ if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie)
!= DDI_SUCCESS)
goto failure;
/*
@@ -736,84 +698,94 @@ xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
* affect the operation of any other part of the driver,
* it needs to acquire the txlock mutex.
*/
- mutex_init(&xnfp->tx_buf_mutex,
- NULL, MUTEX_DRIVER, xnfp->icookie);
- mutex_init(&xnfp->rx_buf_mutex,
- NULL, MUTEX_DRIVER, xnfp->icookie);
- mutex_init(&xnfp->txlock,
- NULL, MUTEX_DRIVER, xnfp->icookie);
- mutex_init(&xnfp->intrlock,
- NULL, MUTEX_DRIVER, xnfp->icookie);
- cv_init(&xnfp->cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&xnfp->xnf_tx_buf_mutex,
+ NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
+ mutex_init(&xnfp->xnf_rx_buf_mutex,
+ NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
+ mutex_init(&xnfp->xnf_txlock,
+ NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
+ mutex_init(&xnfp->xnf_intrlock,
+ NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
+ cv_init(&xnfp->xnf_cv, NULL, CV_DEFAULT, NULL);
if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
- &xnfp->gref_tx_head) < 0) {
+ &xnfp->xnf_gref_tx_head) < 0) {
cmn_err(CE_WARN, "xnf%d: can't alloc tx grant refs",
- ddi_get_instance(xnfp->devinfo));
- goto late_failure;
+ ddi_get_instance(xnfp->xnf_devinfo));
+ goto failure_1;
}
if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
- &xnfp->gref_rx_head) < 0) {
+ &xnfp->xnf_gref_rx_head) < 0) {
cmn_err(CE_WARN, "xnf%d: can't alloc rx grant refs",
- ddi_get_instance(xnfp->devinfo));
- goto late_failure;
+ ddi_get_instance(xnfp->xnf_devinfo));
+ goto failure_1;
}
if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) {
cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize "
- "driver data structures", ddi_get_instance(xnfp->devinfo));
- goto late_failure;
+ "driver data structures",
+ ddi_get_instance(xnfp->xnf_devinfo));
+ goto failure_1;
}
- xnfp->rx_ring.sring->rsp_event = xnfp->tx_ring.sring->rsp_event = 1;
+ xnfp->xnf_rx_ring.sring->rsp_event =
+ xnfp->xnf_tx_ring.sring->rsp_event = 1;
- xnfp->tx_ring_ref = GRANT_INVALID_REF;
- xnfp->rx_ring_ref = GRANT_INVALID_REF;
+ xnfp->xnf_tx_ring_ref = GRANT_INVALID_REF;
+ xnfp->xnf_rx_ring_ref = GRANT_INVALID_REF;
/* set driver private pointer now */
ddi_set_driver_private(devinfo, xnfp);
if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change)
!= DDI_SUCCESS)
- goto late_failure;
+ goto failure_1;
if (!xnf_kstat_init(xnfp))
- goto very_late_failure;
+ goto failure_2;
/*
* Allocate an event channel, add the interrupt handler and
* bind it to the event channel.
*/
(void) xvdi_alloc_evtchn(devinfo);
+ xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
+#ifdef XPV_HVM_DRIVER
+ ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp);
+#else
(void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp);
- xnfp->evtchn = xvdi_get_evtchn(devinfo);
+#endif
/*
* connect to the backend
*/
xnf_be_connect(xnfp);
- err = mac_register(macp, &xnfp->mh);
+ err = mac_register(macp, &xnfp->xnf_mh);
mac_free(macp);
macp = NULL;
if (err != 0)
- goto very_very_late_failure;
+ goto failure_3;
return (DDI_SUCCESS);
-very_very_late_failure:
- kstat_delete(xnfp->kstat_aux);
+failure_3:
+ kstat_delete(xnfp->xnf_kstat_aux);
-very_late_failure:
+failure_2:
xvdi_remove_event_handler(devinfo, XS_OE_STATE);
- ddi_remove_intr(devinfo, 0, xnfp->icookie);
- xnfp->evtchn = INVALID_EVTCHN;
+#ifdef XPV_HVM_DRIVER
+ ec_unbind_evtchn(xnfp->xnf_evtchn);
+#else
+ ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
+#endif
+ xnfp->xnf_evtchn = INVALID_EVTCHN;
-late_failure:
+failure_1:
xnf_release_dma_resources(xnfp);
- cv_destroy(&xnfp->cv);
- mutex_destroy(&xnfp->rx_buf_mutex);
- mutex_destroy(&xnfp->txlock);
- mutex_destroy(&xnfp->intrlock);
+ cv_destroy(&xnfp->xnf_cv);
+ mutex_destroy(&xnfp->xnf_rx_buf_mutex);
+ mutex_destroy(&xnfp->xnf_txlock);
+ mutex_destroy(&xnfp->xnf_intrlock);
failure:
kmem_free(xnfp, sizeof (*xnfp));
@@ -839,17 +811,21 @@ xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
switch (cmd) {
case DDI_SUSPEND:
- ddi_remove_intr(devinfo, 0, xnfp->icookie);
+#ifdef XPV_HVM_DRIVER
+ ec_unbind_evtchn(xnfp->xnf_evtchn);
+#else
+ ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
+#endif
xvdi_suspend(devinfo);
- mutex_enter(&xnfp->intrlock);
- mutex_enter(&xnfp->txlock);
+ mutex_enter(&xnfp->xnf_intrlock);
+ mutex_enter(&xnfp->xnf_txlock);
- xnfp->evtchn = INVALID_EVTCHN;
- xnfp->connected = B_FALSE;
- mutex_exit(&xnfp->txlock);
- mutex_exit(&xnfp->intrlock);
+ xnfp->xnf_evtchn = INVALID_EVTCHN;
+ xnfp->xnf_connected = B_FALSE;
+ mutex_exit(&xnfp->xnf_txlock);
+ mutex_exit(&xnfp->xnf_intrlock);
return (DDI_SUCCESS);
case DDI_DETACH:
@@ -859,32 +835,32 @@ xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
return (DDI_FAILURE);
}
- if (xnfp->connected)
+ if (xnfp->xnf_connected)
return (DDI_FAILURE);
/* Wait for receive buffers to be returned; give up after 5 seconds */
i = 50;
- mutex_enter(&xnfp->rx_buf_mutex);
- while (xnfp->rx_bufs_outstanding > 0) {
- mutex_exit(&xnfp->rx_buf_mutex);
+ mutex_enter(&xnfp->xnf_rx_buf_mutex);
+ while (xnfp->xnf_rx_bufs_outstanding > 0) {
+ mutex_exit(&xnfp->xnf_rx_buf_mutex);
delay(drv_usectohz(100000));
if (--i == 0) {
cmn_err(CE_WARN,
"xnf%d: never reclaimed all the "
"receive buffers. Still have %d "
"buffers outstanding.",
- ddi_get_instance(xnfp->devinfo),
- xnfp->rx_bufs_outstanding);
+ ddi_get_instance(xnfp->xnf_devinfo),
+ xnfp->xnf_rx_bufs_outstanding);
return (DDI_FAILURE);
}
- mutex_enter(&xnfp->rx_buf_mutex);
+ mutex_enter(&xnfp->xnf_rx_buf_mutex);
}
- mutex_exit(&xnfp->rx_buf_mutex);
+ mutex_exit(&xnfp->xnf_rx_buf_mutex);
- kstat_delete(xnfp->kstat_aux);
+ kstat_delete(xnfp->xnf_kstat_aux);
- if (mac_unregister(xnfp->mh) != 0)
+ if (mac_unregister(xnfp->xnf_mh) != 0)
return (DDI_FAILURE);
/* Stop the receiver */
@@ -893,7 +869,11 @@ xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
xvdi_remove_event_handler(devinfo, XS_OE_STATE);
/* Remove the interrupt */
- ddi_remove_intr(devinfo, 0, xnfp->icookie);
+#ifdef XPV_HVM_DRIVER
+ ec_unbind_evtchn(xnfp->xnf_evtchn);
+#else
+ ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
+#endif
/* Release any pending xmit mblks */
xnf_release_mblks(xnfp);
@@ -901,10 +881,10 @@ xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
/* Release all DMA resources */
xnf_release_dma_resources(xnfp);
- cv_destroy(&xnfp->cv);
- mutex_destroy(&xnfp->rx_buf_mutex);
- mutex_destroy(&xnfp->txlock);
- mutex_destroy(&xnfp->intrlock);
+ cv_destroy(&xnfp->xnf_cv);
+ mutex_destroy(&xnfp->xnf_rx_buf_mutex);
+ mutex_destroy(&xnfp->xnf_txlock);
+ mutex_destroy(&xnfp->xnf_intrlock);
kmem_free(xnfp, sizeof (*xnfp));
@@ -924,7 +904,7 @@ xnf_set_mac_addr(void *arg, const uint8_t *macaddr)
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d: set_mac_addr(0x%p): "
"%02x:%02x:%02x:%02x:%02x:%02x\n",
- ddi_get_instance(xnfp->devinfo),
+ ddi_get_instance(xnfp->xnf_devinfo),
(void *)xnfp, macaddr[0], macaddr[1], macaddr[2],
macaddr[3], macaddr[4], macaddr[5]);
#endif
@@ -952,7 +932,7 @@ xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca)
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d set_multicast(0x%p): "
"%02x:%02x:%02x:%02x:%02x:%02x\n",
- ddi_get_instance(xnfp->devinfo),
+ ddi_get_instance(xnfp->xnf_devinfo),
(void *)xnfp, mca[0], mca[1], mca[2],
mca[3], mca[4], mca[5]);
#endif
@@ -983,7 +963,7 @@ xnf_set_promiscuous(void *arg, boolean_t on)
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d set_promiscuous(0x%p, %x)\n",
- ddi_get_instance(xnfp->devinfo),
+ ddi_get_instance(xnfp->xnf_devinfo),
(void *)xnfp, on);
#endif
/*
@@ -1004,45 +984,46 @@ xnf_clean_tx_ring(xnf_t *xnfp)
int id;
grant_ref_t ref;
- ASSERT(MUTEX_HELD(&xnfp->txlock));
+ ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
do {
/*
* index of next transmission ack
*/
- next_resp = xnfp->tx_ring.sring->rsp_prod;
+ next_resp = xnfp->xnf_tx_ring.sring->rsp_prod;
membar_consumer();
/*
* Clean tx packets from ring that we have responses for
*/
- for (i = xnfp->tx_ring.rsp_cons; i != next_resp; i++) {
- id = RING_GET_RESPONSE(&xnfp->tx_ring, i)->id;
- reap = &xnfp->tx_pkt_info[id];
+ for (i = xnfp->xnf_tx_ring.rsp_cons; i != next_resp; i++) {
+ id = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i)->id;
+ reap = &xnfp->xnf_tx_pkt_info[id];
ref = reap->grant_ref;
/*
* Return id to free list
*/
- reap->id = xnfp->tx_pkt_id_list;
- xnfp->tx_pkt_id_list = id;
+ reap->id = xnfp->xnf_tx_pkt_id_list;
+ xnfp->xnf_tx_pkt_id_list = id;
if (gnttab_query_foreign_access(ref) != 0)
- panic("tx grant still in use"
+ panic("tx grant still in use "
"by backend domain");
(void) ddi_dma_unbind_handle(reap->dma_handle);
(void) gnttab_end_foreign_access_ref(ref,
- xnfp->tx_pages_readonly);
- gnttab_release_grant_reference(&xnfp->gref_tx_head,
+ xnfp->xnf_tx_pages_readonly);
+ gnttab_release_grant_reference(&xnfp->xnf_gref_tx_head,
ref);
freemsg(reap->mp);
reap->mp = NULL;
reap->grant_ref = GRANT_INVALID_REF;
if (reap->bdesc != NULL)
- xnf_free_xmit_buffer(reap->bdesc);
+ xnf_free_tx_buffer(reap->bdesc);
reap->bdesc = NULL;
}
- xnfp->tx_ring.rsp_cons = next_resp;
+ xnfp->xnf_tx_ring.rsp_cons = next_resp;
membar_enter();
- } while (next_resp != xnfp->tx_ring.sring->rsp_prod);
- return (NET_TX_RING_SIZE - (xnfp->tx_ring.sring->req_prod - next_resp));
+ } while (next_resp != xnfp->xnf_tx_ring.sring->rsp_prod);
+ return (NET_TX_RING_SIZE - (xnfp->xnf_tx_ring.sring->req_prod -
+ next_resp));
}
/*
@@ -1062,15 +1043,15 @@ xnf_pullupmsg(xnf_t *xnfp, mblk_t *mp)
/*
* get a xmit buffer from the xmit buffer pool
*/
- mutex_enter(&xnfp->rx_buf_mutex);
- bdesc = xnf_get_xmit_buffer(xnfp);
- mutex_exit(&xnfp->rx_buf_mutex);
+ mutex_enter(&xnfp->xnf_rx_buf_mutex);
+ bdesc = xnf_get_tx_buffer(xnfp);
+ mutex_exit(&xnfp->xnf_rx_buf_mutex);
if (bdesc == NULL)
return (bdesc);
/*
* Copy the data into the buffer
*/
- xnfp->stat_xmit_pullup++;
+ xnfp->xnf_stat_tx_pullup++;
bp = bdesc->buf;
for (mptr = mp; mptr != NULL; mptr = mptr->b_cont) {
len = mptr->b_wptr - mptr->b_rptr;
@@ -1112,28 +1093,28 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp)
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_SEND)
printf("xnf%d send(0x%p, 0x%p)\n",
- ddi_get_instance(xnfp->devinfo),
+ ddi_get_instance(xnfp->xnf_devinfo),
(void *)xnfp, (void *)mp);
#endif
ASSERT(mp != NULL);
ASSERT(mp->b_next == NULL);
- ASSERT(MUTEX_HELD(&xnfp->txlock));
+ ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
tx_ring_freespace = xnf_clean_tx_ring(xnfp);
ASSERT(tx_ring_freespace >= 0);
- oeid = xvdi_get_oeid(xnfp->devinfo);
- xnfp->stat_xmit_attempt++;
+ oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
+ xnfp->xnf_stat_tx_attempt++;
/*
* If there are no xmit ring slots available, return.
*/
if (tx_ring_freespace == 0) {
- xnfp->stat_xmit_defer++;
+ xnfp->xnf_stat_tx_defer++;
return (B_FALSE); /* Send should be retried */
}
- slot = xnfp->tx_ring.sring->req_prod;
+ slot = xnfp->xnf_tx_ring.sring->req_prod;
/* Count the number of mblks in message and compute packet size */
for (i = 0, mptr = mp; mptr != NULL; mptr = mptr->b_cont, i++)
pktlen += (mptr->b_wptr - mptr->b_rptr);
@@ -1141,7 +1122,7 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp)
/* Make sure packet isn't too large */
if (pktlen > XNF_FRAMESIZE) {
cmn_err(CE_WARN, "xnf%d: large packet %d bytes",
- ddi_get_instance(xnfp->devinfo), pktlen);
+ ddi_get_instance(xnfp->xnf_devinfo), pktlen);
freemsg(mp);
return (B_FALSE);
}
@@ -1159,14 +1140,14 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp)
*/
if (i > xnf_max_tx_frags || page_oops) {
if (page_oops)
- xnfp->stat_xmit_pagebndry++;
+ xnfp->xnf_stat_tx_pagebndry++;
if ((xmitbuf = xnf_pullupmsg(xnfp, mp)) == NULL) {
/* could not allocate resources? */
#ifdef XNF_DEBUG
cmn_err(CE_WARN, "xnf%d: pullupmsg failed",
- ddi_get_instance(xnfp->devinfo));
+ ddi_get_instance(xnfp->xnf_devinfo));
#endif
- xnfp->stat_xmit_defer++;
+ xnfp->xnf_stat_tx_defer++;
return (B_FALSE); /* Retry send */
}
bufaddr = xmitbuf->buf;
@@ -1181,10 +1162,10 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp)
/*
* Get packet id from free list
*/
- tx_id = xnfp->tx_pkt_id_list;
+ tx_id = xnfp->xnf_tx_pkt_id_list;
ASSERT(tx_id < NET_TX_RING_SIZE);
- txp_info = &xnfp->tx_pkt_info[tx_id];
- xnfp->tx_pkt_id_list = txp_info->id;
+ txp_info = &xnfp->xnf_tx_pkt_info[tx_id];
+ xnfp->xnf_tx_pkt_id_list = txp_info->id;
txp_info->id = tx_id;
/* Prepare for DMA mapping of tx buffer(s) */
@@ -1197,27 +1178,27 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp)
/*
* Return id to free list
*/
- txp_info->id = xnfp->tx_pkt_id_list;
- xnfp->tx_pkt_id_list = tx_id;
+ txp_info->id = xnfp->xnf_tx_pkt_id_list;
+ xnfp->xnf_tx_pkt_id_list = tx_id;
if (rc == DDI_DMA_NORESOURCES) {
- xnfp->stat_xmit_defer++;
+ xnfp->xnf_stat_tx_defer++;
return (B_FALSE); /* Retry later */
}
#ifdef XNF_DEBUG
cmn_err(CE_WARN, "xnf%d: bind_handle failed (%x)",
- ddi_get_instance(xnfp->devinfo), rc);
+ ddi_get_instance(xnfp->xnf_devinfo), rc);
#endif
return (B_FALSE);
}
ASSERT(ncookies == 1);
- ref = gnttab_claim_grant_reference(&xnfp->gref_tx_head);
+ ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_tx_head);
ASSERT((signed short)ref >= 0);
mfn = xnf_btop(pa_to_ma((paddr_t)dma_cookie.dmac_laddress));
gnttab_grant_foreign_access_ref(ref, oeid, mfn,
- xnfp->tx_pages_readonly);
+ xnfp->xnf_tx_pages_readonly);
txp_info->grant_ref = ref;
- txrp = RING_GET_REQUEST(&xnfp->tx_ring, slot);
+ txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
txrp->gref = ref;
txrp->size = dma_cookie.dmac_size;
txrp->offset = (uintptr_t)bufaddr & PAGEOFFSET;
@@ -1225,7 +1206,7 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp)
txrp->flags = 0;
hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags);
if (pflags != 0) {
- ASSERT(xnfp->cksum_offload);
+ ASSERT(xnfp->xnf_cksum_offload);
/*
* If the local protocol stack requests checksum
* offload we set the 'checksum blank' flag,
@@ -1236,27 +1217,28 @@ xnf_send_one(xnf_t *xnfp, mblk_t *mp)
* validated that the data and the checksum match.
*/
txrp->flags |= NETTXF_csum_blank;
- xnfp->stat_tx_cksum_deferred++;
+ xnfp->xnf_stat_tx_cksum_deferred++;
}
membar_producer();
- xnfp->tx_ring.sring->req_prod = slot + 1;
+ xnfp->xnf_tx_ring.sring->req_prod = slot + 1;
txp_info->mp = mp;
txp_info->bdesc = xmitbuf;
- txs_out = xnfp->tx_ring.sring->req_prod - xnfp->tx_ring.sring->rsp_prod;
- if (xnfp->tx_ring.sring->req_prod - xnfp->tx_ring.rsp_cons <
+ txs_out = xnfp->xnf_tx_ring.sring->req_prod -
+ xnfp->xnf_tx_ring.sring->rsp_prod;
+ if (xnfp->xnf_tx_ring.sring->req_prod - xnfp->xnf_tx_ring.rsp_cons <
XNF_TX_FREE_THRESH) {
/*
* The ring is getting full; Set up this packet
* to cause an interrupt.
*/
- xnfp->tx_ring.sring->rsp_event =
- xnfp->tx_ring.sring->rsp_prod + txs_out;
+ xnfp->xnf_tx_ring.sring->rsp_event =
+ xnfp->xnf_tx_ring.sring->rsp_prod + txs_out;
}
- xnfp->stat_opackets++;
- xnfp->stat_obytes += pktlen;
+ xnfp->xnf_stat_opackets++;
+ xnfp->xnf_stat_obytes += pktlen;
return (B_TRUE); /* successful transmit attempt */
}
@@ -1268,19 +1250,19 @@ xnf_send(void *arg, mblk_t *mp)
mblk_t *next;
boolean_t sent_something = B_FALSE;
- mutex_enter(&xnfp->txlock);
+ mutex_enter(&xnfp->xnf_txlock);
/*
* Transmission attempts should be impossible without having
* previously called xnf_start().
*/
- ASSERT(xnfp->running);
+ ASSERT(xnfp->xnf_running);
/*
* Wait for getting connected to the backend
*/
- while (!xnfp->connected) {
- cv_wait(&xnfp->cv, &xnfp->txlock);
+ while (!xnfp->xnf_connected) {
+ cv_wait(&xnfp->xnf_cv, &xnfp->xnf_txlock);
}
while (mp != NULL) {
@@ -1297,9 +1279,9 @@ xnf_send(void *arg, mblk_t *mp)
}
if (sent_something)
- ec_notify_via_evtchn(xnfp->evtchn);
+ ec_notify_via_evtchn(xnfp->xnf_evtchn);
- mutex_exit(&xnfp->txlock);
+ mutex_exit(&xnfp->xnf_txlock);
return (mp);
}
@@ -1313,27 +1295,33 @@ xnf_intr(caddr_t arg)
xnf_t *xnfp = (xnf_t *)arg;
int tx_ring_space;
- mutex_enter(&xnfp->intrlock);
+ mutex_enter(&xnfp->xnf_intrlock);
/*
* If not connected to the peer or not started by the upper
* layers we cannot usefully handle interrupts.
*/
- if (!(xnfp->connected && xnfp->running)) {
- mutex_exit(&xnfp->intrlock);
+ if (!(xnfp->xnf_connected && xnfp->xnf_running)) {
+ mutex_exit(&xnfp->xnf_intrlock);
+ xnfp->xnf_stat_unclaimed_interrupts++;
return (DDI_INTR_UNCLAIMED);
}
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_INT)
printf("xnf%d intr(0x%p)\n",
- ddi_get_instance(xnfp->devinfo), (void *)xnfp);
+ ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
#endif
- if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->rx_ring)) {
+ if (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) {
mblk_t *mp;
- if ((mp = xnf_process_recv(xnfp)) != NULL)
- mac_rx(xnfp->mh, xnfp->rx_handle, mp);
+ if (xnfp->xnf_rx_hvcopy)
+ mp = xnf_process_hvcopy_recv(xnfp);
+ else
+ mp = xnf_process_recv(xnfp);
+
+ if (mp != NULL)
+ mac_rx(xnfp->xnf_mh, xnfp->xnf_rx_handle, mp);
}
/*
@@ -1341,32 +1329,33 @@ xnf_intr(caddr_t arg)
*/
#define inuse(r) ((r).sring->req_prod - (r).rsp_cons)
- if ((NET_TX_RING_SIZE - inuse(xnfp->tx_ring)) < XNF_TX_FREE_THRESH) {
+ if ((NET_TX_RING_SIZE - inuse(xnfp->xnf_tx_ring)) <
+ XNF_TX_FREE_THRESH) {
/*
* Yes, clean it and try to start any blocked xmit
* streams.
*/
- mutex_enter(&xnfp->txlock);
+ mutex_enter(&xnfp->xnf_txlock);
tx_ring_space = xnf_clean_tx_ring(xnfp);
- mutex_exit(&xnfp->txlock);
+ mutex_exit(&xnfp->xnf_txlock);
if (tx_ring_space > XNF_TX_FREE_THRESH) {
- mutex_exit(&xnfp->intrlock);
- mac_tx_update(xnfp->mh);
- mutex_enter(&xnfp->intrlock);
+ mutex_exit(&xnfp->xnf_intrlock);
+ mac_tx_update(xnfp->xnf_mh);
+ mutex_enter(&xnfp->xnf_intrlock);
} else {
/*
* Schedule another tx interrupt when we have
* sent enough packets to cross the threshold.
*/
- xnfp->tx_ring.sring->rsp_event =
- xnfp->tx_ring.sring->rsp_prod +
+ xnfp->xnf_tx_ring.sring->rsp_event =
+ xnfp->xnf_tx_ring.sring->rsp_prod +
XNF_TX_FREE_THRESH - tx_ring_space + 1;
}
}
#undef inuse
- xnfp->stat_intr++;
- mutex_exit(&xnfp->intrlock);
+ xnfp->xnf_stat_interrupts++;
+ mutex_exit(&xnfp->xnf_intrlock);
return (DDI_INTR_CLAIMED); /* indicate that the interrupt was for us */
}
@@ -1381,17 +1370,17 @@ xnf_start(void *arg)
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d start(0x%p)\n",
- ddi_get_instance(xnfp->devinfo), (void *)xnfp);
+ ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
#endif
- mutex_enter(&xnfp->intrlock);
- mutex_enter(&xnfp->txlock);
+ mutex_enter(&xnfp->xnf_intrlock);
+ mutex_enter(&xnfp->xnf_txlock);
/* Accept packets from above. */
- xnfp->running = B_TRUE;
+ xnfp->xnf_running = B_TRUE;
- mutex_exit(&xnfp->txlock);
- mutex_exit(&xnfp->intrlock);
+ mutex_exit(&xnfp->xnf_txlock);
+ mutex_exit(&xnfp->xnf_intrlock);
return (0);
}
@@ -1405,16 +1394,16 @@ xnf_stop(void *arg)
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d stop(0x%p)\n",
- ddi_get_instance(xnfp->devinfo), (void *)xnfp);
+ ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
#endif
- mutex_enter(&xnfp->intrlock);
- mutex_enter(&xnfp->txlock);
+ mutex_enter(&xnfp->xnf_intrlock);
+ mutex_enter(&xnfp->xnf_txlock);
- xnfp->running = B_FALSE;
+ xnfp->xnf_running = B_FALSE;
- mutex_exit(&xnfp->txlock);
- mutex_exit(&xnfp->intrlock);
+ mutex_exit(&xnfp->xnf_txlock);
+ mutex_exit(&xnfp->xnf_intrlock);
}
/*
@@ -1428,30 +1417,203 @@ static void
rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc)
{
volatile netif_rx_request_t *reqp;
- RING_IDX hang_ix;
- grant_ref_t ref;
- domid_t oeid;
+ RING_IDX hang_ix;
+ grant_ref_t ref;
+ domid_t oeid;
- oeid = xvdi_get_oeid(xnfp->devinfo);
+ oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
- ASSERT(MUTEX_HELD(&xnfp->intrlock));
- reqp = RING_GET_REQUEST(&xnfp->rx_ring, xnfp->rx_ring.req_prod_pvt);
- hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->rx_ring, 0));
- ASSERT(xnfp->rxpkt_bufptr[hang_ix] == NULL);
+ ASSERT(MUTEX_HELD(&xnfp->xnf_intrlock));
+ reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring,
+ xnfp->xnf_rx_ring.req_prod_pvt);
+ hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0));
+ ASSERT(xnfp->xnf_rxpkt_bufptr[hang_ix] == NULL);
if (bdesc->grant_ref == GRANT_INVALID_REF) {
- ref = gnttab_claim_grant_reference(&xnfp->gref_rx_head);
+ ref = gnttab_claim_grant_reference(&xnfp->xnf_gref_rx_head);
ASSERT((signed short)ref >= 0);
bdesc->grant_ref = ref;
- gnttab_grant_foreign_transfer_ref(ref, oeid);
+ if (xnfp->xnf_rx_hvcopy) {
+ pfn_t pfn = xnf_btop(bdesc->buf_phys);
+ mfn_t mfn = pfn_to_mfn(pfn);
+
+ gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0);
+ } else {
+ gnttab_grant_foreign_transfer_ref(ref, oeid);
+ }
}
reqp->id = hang_ix;
reqp->gref = bdesc->grant_ref;
bdesc->id = hang_ix;
- xnfp->rxpkt_bufptr[hang_ix] = bdesc;
+ xnfp->xnf_rxpkt_bufptr[hang_ix] = bdesc;
membar_producer();
- xnfp->rx_ring.req_prod_pvt++;
+ xnfp->xnf_rx_ring.req_prod_pvt++;
}
+static mblk_t *
+xnf_process_hvcopy_recv(xnf_t *xnfp)
+{
+ netif_rx_response_t *rxpkt;
+ mblk_t *mp, *head, *tail;
+ struct xnf_buffer_desc *bdesc;
+ boolean_t hwcsum = B_FALSE, notify, work_to_do;
+ size_t len;
+
+ /*
+ * in loop over unconsumed responses, we do:
+ * 1. get a response
+ * 2. take corresponding buffer off recv. ring
+ * 3. indicate this by setting slot to NULL
+ * 4. create a new message and
+ * 5. copy data in, adjust ptr
+ *
+ * outside loop:
+ * 7. make sure no more data has arrived; kick HV
+ */
+
+ head = tail = NULL;
+
+loop:
+ while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) {
+
+ /* 1. */
+ rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring,
+ xnfp->xnf_rx_ring.rsp_cons);
+
+ DTRACE_PROBE4(got_PKT, int, (int)rxpkt->id, int,
+ (int)rxpkt->offset,
+ int, (int)rxpkt->flags, int, (int)rxpkt->status);
+
+ /*
+ * 2.
+ * Take buffer off of receive ring
+ */
+ hwcsum = B_FALSE;
+ bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id];
+ /* 3 */
+ xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL;
+ ASSERT(bdesc->id == rxpkt->id);
+ if (rxpkt->status <= 0) {
+ DTRACE_PROBE4(pkt_status_negative, int, rxpkt->status,
+ char *, bdesc->buf, int, rxpkt->offset,
+ char *, ((char *)bdesc->buf) + rxpkt->offset);
+ mp = NULL;
+ xnfp->xnf_stat_errrx++;
+ if (rxpkt->status == 0)
+ xnfp->xnf_stat_runt++;
+ if (rxpkt->status == NETIF_RSP_ERROR)
+ xnfp->xnf_stat_mac_rcv_error++;
+ if (rxpkt->status == NETIF_RSP_DROPPED)
+ xnfp->xnf_stat_norxbuf++;
+ /*
+ * re-hang the buffer
+ */
+ rx_buffer_hang(xnfp, bdesc);
+ } else {
+ grant_ref_t ref = bdesc->grant_ref;
+ struct xnf_buffer_desc *new_bdesc;
+ unsigned long off = rxpkt->offset;
+
+ DTRACE_PROBE4(pkt_status_ok, int, rxpkt->status,
+ char *, bdesc->buf, int, rxpkt->offset,
+ char *, ((char *)bdesc->buf) + rxpkt->offset);
+ len = rxpkt->status;
+ ASSERT(off + len <= PAGEOFFSET);
+ if (ref == GRANT_INVALID_REF) {
+ mp = NULL;
+ new_bdesc = bdesc;
+ cmn_err(CE_WARN, "Bad rx grant reference %d "
+ "from dom %d", ref,
+ xvdi_get_oeid(xnfp->xnf_devinfo));
+ goto luckless;
+ }
+ /*
+ * Release ref which we'll be re-claiming in
+ * rx_buffer_hang().
+ */
+ bdesc->grant_ref = GRANT_INVALID_REF;
+ (void) gnttab_end_foreign_access_ref(ref, 0);
+ gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head,
+ ref);
+ if (rxpkt->flags & NETRXF_data_validated)
+ hwcsum = B_TRUE;
+
+ /*
+ * XXPV for the initial implementation of HVcopy,
+ * create a new msg and copy in the data
+ */
+ /* 4. */
+ if ((mp = allocb(len, BPRI_MED)) == NULL) {
+ /*
+ * Couldn't get buffer to copy to,
+ * drop this data, and re-hang
+ * the buffer on the ring.
+ */
+ xnfp->xnf_stat_norxbuf++;
+ DTRACE_PROBE(alloc_nix);
+ } else {
+ /* 5. */
+ DTRACE_PROBE(alloc_ok);
+ bcopy(bdesc->buf + off, mp->b_wptr,
+ len);
+ mp->b_wptr += len;
+ }
+ new_bdesc = bdesc;
+luckless:
+
+ /* Re-hang old or hang new buffer. */
+ rx_buffer_hang(xnfp, new_bdesc);
+ }
+ if (mp) {
+ if (hwcsum) {
+ /*
+ * See comments in xnf_process_recv().
+ */
+
+ (void) hcksum_assoc(mp, NULL,
+ NULL, 0, 0, 0, 0,
+ HCK_FULLCKSUM |
+ HCK_FULLCKSUM_OK,
+ 0);
+ xnfp->xnf_stat_rx_cksum_no_need++;
+ }
+ if (head == NULL) {
+ head = tail = mp;
+ } else {
+ tail->b_next = mp;
+ tail = mp;
+ }
+
+ ASSERT(mp->b_next == NULL);
+
+ xnfp->xnf_stat_ipackets++;
+ xnfp->xnf_stat_rbytes += len;
+ }
+
+ xnfp->xnf_rx_ring.rsp_cons++;
+
+ xnfp->xnf_stat_hvcopy_packet_processed++;
+ }
+
+ /* 7. */
+ /*
+ * Has more data come in since we started?
+ */
+ /* LINTED: constant in conditional context */
+ RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do);
+ if (work_to_do)
+ goto loop;
+
+ /*
+ * Indicate to the backend that we have re-filled the receive
+ * ring.
+ */
+ /* LINTED: constant in conditional context */
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify);
+ if (notify)
+ ec_notify_via_evtchn(xnfp->xnf_evtchn);
+
+ return (head);
+}
/* Process all queued received packets */
static mblk_t *
@@ -1468,27 +1630,27 @@ xnf_process_recv(xnf_t *xnfp)
head = tail = NULL;
loop:
- while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->rx_ring)) {
+ while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) {
- rxpkt = RING_GET_RESPONSE(&xnfp->rx_ring,
- xnfp->rx_ring.rsp_cons);
+ rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring,
+ xnfp->xnf_rx_ring.rsp_cons);
/*
* Take buffer off of receive ring
*/
hwcsum = B_FALSE;
- bdesc = xnfp->rxpkt_bufptr[rxpkt->id];
- xnfp->rxpkt_bufptr[rxpkt->id] = NULL;
+ bdesc = xnfp->xnf_rxpkt_bufptr[rxpkt->id];
+ xnfp->xnf_rxpkt_bufptr[rxpkt->id] = NULL;
ASSERT(bdesc->id == rxpkt->id);
if (rxpkt->status <= 0) {
mp = NULL;
- xnfp->stat_errrcv++;
+ xnfp->xnf_stat_errrx++;
if (rxpkt->status == 0)
- xnfp->stat_runt++;
+ xnfp->xnf_stat_runt++;
if (rxpkt->status == NETIF_RSP_ERROR)
- xnfp->stat_mac_rcv_error++;
+ xnfp->xnf_stat_mac_rcv_error++;
if (rxpkt->status == NETIF_RSP_DROPPED)
- xnfp->stat_norcvbuf++;
+ xnfp->xnf_stat_norxbuf++;
/*
* re-hang the buffer
*/
@@ -1506,7 +1668,7 @@ loop:
new_bdesc = bdesc;
cmn_err(CE_WARN, "Bad rx grant reference %d "
"from dom %d", ref,
- xvdi_get_oeid(xnfp->devinfo));
+ xvdi_get_oeid(xnfp->xnf_devinfo));
goto luckless;
}
bdesc->grant_ref = GRANT_INVALID_REF;
@@ -1514,13 +1676,15 @@ loop:
ASSERT(mfn != MFN_INVALID);
ASSERT(hat_getpfnum(kas.a_hat, bdesc->buf) ==
PFN_INVALID);
- gnttab_release_grant_reference(&xnfp->gref_rx_head,
+
+ gnttab_release_grant_reference(&xnfp->xnf_gref_rx_head,
ref);
reassign_pfn(xnf_btop(bdesc->buf_phys), mfn);
hat_devload(kas.a_hat, bdesc->buf, PAGESIZE,
xnf_btop(bdesc->buf_phys),
PROT_READ | PROT_WRITE, HAT_LOAD);
balloon_drv_added(1);
+
if (rxpkt->flags & NETRXF_data_validated)
hwcsum = B_TRUE;
if (len <= xnf_rx_bcopy_thresh) {
@@ -1534,14 +1698,14 @@ loop:
* We send a pointer to this data upstream;
* we need a new buffer to replace this one.
*/
- mutex_enter(&xnfp->rx_buf_mutex);
+ mutex_enter(&xnfp->xnf_rx_buf_mutex);
new_bdesc = xnf_get_buffer(xnfp);
if (new_bdesc != NULL) {
- xnfp->rx_bufs_outstanding++;
+ xnfp->xnf_rx_bufs_outstanding++;
} else {
- xnfp->stat_rx_no_ringbuf++;
+ xnfp->xnf_stat_rx_no_ringbuf++;
}
- mutex_exit(&xnfp->rx_buf_mutex);
+ mutex_exit(&xnfp->xnf_rx_buf_mutex);
}
if (new_bdesc == NULL) {
@@ -1556,7 +1720,7 @@ loop:
* drop this data, and re-hang
* the buffer on the ring.
*/
- xnfp->stat_norcvbuf++;
+ xnfp->xnf_stat_norxbuf++;
} else {
bcopy(bdesc->buf + off, mp->b_wptr,
len);
@@ -1579,7 +1743,7 @@ loop:
* Couldn't get mblk to pass recv data
* up with, free the old ring buffer
*/
- xnfp->stat_norcvbuf++;
+ xnfp->xnf_stat_norxbuf++;
xnf_rcv_complete(bdesc);
goto luckless;
}
@@ -1624,7 +1788,7 @@ luckless:
HCK_FULLCKSUM |
HCK_FULLCKSUM_OK,
0);
- xnfp->stat_rx_cksum_no_need++;
+ xnfp->xnf_stat_rx_cksum_no_need++;
}
if (head == NULL) {
head = tail = mp;
@@ -1635,18 +1799,18 @@ luckless:
ASSERT(mp->b_next == NULL);
- xnfp->stat_ipackets++;
- xnfp->stat_rbytes += len;
+ xnfp->xnf_stat_ipackets++;
+ xnfp->xnf_stat_rbytes += len;
}
- xnfp->rx_ring.rsp_cons++;
+ xnfp->xnf_rx_ring.rsp_cons++;
}
/*
* Has more data come in since we started?
*/
/* LINTED: constant in conditional context */
- RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->rx_ring, work_to_do);
+ RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_rx_ring, work_to_do);
if (work_to_do)
goto loop;
@@ -1655,9 +1819,9 @@ luckless:
* ring.
*/
/* LINTED: constant in conditional context */
- RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->rx_ring, notify);
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify);
if (notify)
- ec_notify_via_evtchn(xnfp->evtchn);
+ ec_notify_via_evtchn(xnfp->xnf_evtchn);
return (head);
}
@@ -1671,13 +1835,13 @@ xnf_rcv_complete(struct xnf_buffer_desc *bdesc)
long cnt;
/* One less outstanding receive buffer */
- mutex_enter(&xnfp->rx_buf_mutex);
- --xnfp->rx_bufs_outstanding;
+ mutex_enter(&xnfp->xnf_rx_buf_mutex);
+ --xnfp->xnf_rx_bufs_outstanding;
/*
* Return buffer to the free list, unless the free list is getting
- * too large. XXX - this threshold may need tuning.
+ * too large. XXPV - this threshold may need tuning.
*/
- if (xnfp->rx_descs_free < xnf_recv_bufs_lowat) {
+ if (xnfp->xnf_rx_descs_free < xnf_rx_bufs_lowat) {
/*
* Unmap the page, and hand the machine page back
* to xen so it can be re-used as a backend net buffer.
@@ -1689,17 +1853,17 @@ xnf_rcv_complete(struct xnf_buffer_desc *bdesc)
"hypervisor\n");
}
- bdesc->next = xnfp->free_list;
- xnfp->free_list = bdesc;
- xnfp->rx_descs_free++;
- mutex_exit(&xnfp->rx_buf_mutex);
+ bdesc->next = xnfp->xnf_free_list;
+ xnfp->xnf_free_list = bdesc;
+ xnfp->xnf_rx_descs_free++;
+ mutex_exit(&xnfp->xnf_rx_buf_mutex);
} else {
/*
* We can return everything here since we have a free buffer
* that we have not given the backing page for back to xen.
*/
- --xnfp->recv_buffer_count;
- mutex_exit(&xnfp->rx_buf_mutex);
+ --xnfp->xnf_rx_buffer_count;
+ mutex_exit(&xnfp->xnf_rx_buf_mutex);
(void) ddi_dma_unbind_handle(bdesc->dma_handle);
ddi_dma_mem_free(&bdesc->acc_handle);
ddi_dma_free_handle(&bdesc->dma_handle);
@@ -1713,7 +1877,7 @@ xnf_rcv_complete(struct xnf_buffer_desc *bdesc)
static int
xnf_alloc_dma_resources(xnf_t *xnfp)
{
- dev_info_t *devinfo = xnfp->devinfo;
+ dev_info_t *devinfo = xnfp->xnf_devinfo;
int i;
size_t len;
ddi_dma_cookie_t dma_cookie;
@@ -1722,10 +1886,10 @@ xnf_alloc_dma_resources(xnf_t *xnfp)
int rc;
caddr_t rptr;
- xnfp->n_recvs = NET_RX_RING_SIZE;
- xnfp->max_recv_bufs = xnf_recv_bufs_hiwat;
+ xnfp->xnf_n_rx = NET_RX_RING_SIZE;
+ xnfp->xnf_max_rx_bufs = xnf_rx_bufs_hiwat;
- xnfp->n_xmits = NET_TX_RING_SIZE;
+ xnfp->xnf_n_tx = NET_TX_RING_SIZE;
/*
* The code below allocates all the DMA data structures that
@@ -1734,10 +1898,10 @@ xnf_alloc_dma_resources(xnf_t *xnfp)
* First allocate handles for mapping (virtual address) pointers to
* transmit data buffers to physical addresses
*/
- for (i = 0; i < xnfp->n_xmits; i++) {
+ for (i = 0; i < xnfp->xnf_n_tx; i++) {
if ((rc = ddi_dma_alloc_handle(devinfo,
&tx_buffer_dma_attr, DDI_DMA_SLEEP, 0,
- &xnfp->tx_pkt_info[i].dma_handle)) != DDI_SUCCESS)
+ &xnfp->xnf_tx_pkt_info[i].dma_handle)) != DDI_SUCCESS)
return (DDI_FAILURE);
}
@@ -1745,25 +1909,25 @@ xnf_alloc_dma_resources(xnf_t *xnfp)
* Allocate page for the transmit descriptor ring.
*/
if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
- DDI_DMA_SLEEP, 0, &xnfp->tx_ring_dma_handle) != DDI_SUCCESS)
+ DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS)
goto alloc_error;
- if (ddi_dma_mem_alloc(xnfp->tx_ring_dma_handle,
+ if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle,
PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
DDI_DMA_SLEEP, 0, &rptr, &len,
- &xnfp->tx_ring_dma_acchandle) != DDI_SUCCESS) {
- ddi_dma_free_handle(&xnfp->tx_ring_dma_handle);
- xnfp->tx_ring_dma_handle = NULL;
+ &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) {
+ ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
+ xnfp->xnf_tx_ring_dma_handle = NULL;
goto alloc_error;
}
- if ((rc = ddi_dma_addr_bind_handle(xnfp->tx_ring_dma_handle, NULL,
+ if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL,
rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
- ddi_dma_mem_free(&xnfp->tx_ring_dma_acchandle);
- ddi_dma_free_handle(&xnfp->tx_ring_dma_handle);
- xnfp->tx_ring_dma_handle = NULL;
- xnfp->tx_ring_dma_acchandle = NULL;
+ ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
+ ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
+ xnfp->xnf_tx_ring_dma_handle = NULL;
+ xnfp->xnf_tx_ring_dma_acchandle = NULL;
if (rc == DDI_DMA_NORESOURCES)
goto alloc_error;
else
@@ -1775,32 +1939,32 @@ xnf_alloc_dma_resources(xnf_t *xnfp)
/* LINTED: constant in conditional context */
SHARED_RING_INIT((netif_tx_sring_t *)rptr);
/* LINTED: constant in conditional context */
- FRONT_RING_INIT(&xnfp->tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE);
- xnfp->tx_ring_phys_addr = dma_cookie.dmac_laddress;
+ FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE);
+ xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress;
/*
* Allocate page for the receive descriptor ring.
*/
if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
- DDI_DMA_SLEEP, 0, &xnfp->rx_ring_dma_handle) != DDI_SUCCESS)
+ DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS)
goto alloc_error;
- if (ddi_dma_mem_alloc(xnfp->rx_ring_dma_handle,
+ if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle,
PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
DDI_DMA_SLEEP, 0, &rptr, &len,
- &xnfp->rx_ring_dma_acchandle) != DDI_SUCCESS) {
- ddi_dma_free_handle(&xnfp->rx_ring_dma_handle);
- xnfp->rx_ring_dma_handle = NULL;
+ &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) {
+ ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
+ xnfp->xnf_rx_ring_dma_handle = NULL;
goto alloc_error;
}
- if ((rc = ddi_dma_addr_bind_handle(xnfp->rx_ring_dma_handle, NULL,
+ if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL,
rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
- ddi_dma_mem_free(&xnfp->rx_ring_dma_acchandle);
- ddi_dma_free_handle(&xnfp->rx_ring_dma_handle);
- xnfp->rx_ring_dma_handle = NULL;
- xnfp->rx_ring_dma_acchandle = NULL;
+ ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
+ ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
+ xnfp->xnf_rx_ring_dma_handle = NULL;
+ xnfp->xnf_rx_ring_dma_acchandle = NULL;
if (rc == DDI_DMA_NORESOURCES)
goto alloc_error;
else
@@ -1812,26 +1976,26 @@ xnf_alloc_dma_resources(xnf_t *xnfp)
/* LINTED: constant in conditional context */
SHARED_RING_INIT((netif_rx_sring_t *)rptr);
/* LINTED: constant in conditional context */
- FRONT_RING_INIT(&xnfp->rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE);
- xnfp->rx_ring_phys_addr = dma_cookie.dmac_laddress;
+ FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE);
+ xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress;
/*
* Preallocate receive buffers for each receive descriptor.
*/
/* Set up the "free list" of receive buffer descriptors */
- for (i = 0; i < xnfp->n_recvs; i++) {
+ for (i = 0; i < xnfp->xnf_n_rx; i++) {
if ((bdesc = xnf_alloc_buffer(xnfp)) == NULL)
goto alloc_error;
- bdesc->next = xnfp->free_list;
- xnfp->free_list = bdesc;
+ bdesc->next = xnfp->xnf_free_list;
+ xnfp->xnf_free_list = bdesc;
}
return (DDI_SUCCESS);
alloc_error:
cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory",
- ddi_get_instance(xnfp->devinfo));
+ ddi_get_instance(xnfp->xnf_devinfo));
error:
xnf_release_dma_resources(xnfp);
return (DDI_FAILURE);
@@ -1851,28 +2015,28 @@ xnf_release_dma_resources(xnf_t *xnfp)
* Free receive buffers which are currently associated with
* descriptors
*/
- for (i = 0; i < xnfp->n_recvs; i++) {
+ for (i = 0; i < xnfp->xnf_n_rx; i++) {
struct xnf_buffer_desc *bp;
- if ((bp = xnfp->rxpkt_bufptr[i]) == NULL)
+ if ((bp = xnfp->xnf_rxpkt_bufptr[i]) == NULL)
continue;
xnf_free_buffer(bp);
- xnfp->rxpkt_bufptr[i] = NULL;
+ xnfp->xnf_rxpkt_bufptr[i] = NULL;
}
/* Free the receive ring buffer */
- if (xnfp->rx_ring_dma_acchandle != NULL) {
- (void) ddi_dma_unbind_handle(xnfp->rx_ring_dma_handle);
- ddi_dma_mem_free(&xnfp->rx_ring_dma_acchandle);
- ddi_dma_free_handle(&xnfp->rx_ring_dma_handle);
- xnfp->rx_ring_dma_acchandle = NULL;
+ if (xnfp->xnf_rx_ring_dma_acchandle != NULL) {
+ (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle);
+ ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
+ ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
+ xnfp->xnf_rx_ring_dma_acchandle = NULL;
}
/* Free the transmit ring buffer */
- if (xnfp->tx_ring_dma_acchandle != NULL) {
- (void) ddi_dma_unbind_handle(xnfp->tx_ring_dma_handle);
- ddi_dma_mem_free(&xnfp->tx_ring_dma_acchandle);
- ddi_dma_free_handle(&xnfp->tx_ring_dma_handle);
- xnfp->tx_ring_dma_acchandle = NULL;
+ if (xnfp->xnf_tx_ring_dma_acchandle != NULL) {
+ (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle);
+ ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
+ ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
+ xnfp->xnf_tx_ring_dma_acchandle = NULL;
}
}
@@ -1881,12 +2045,13 @@ xnf_release_mblks(xnf_t *xnfp)
{
int i;
- for (i = 0; i < xnfp->n_xmits; i++) {
- if (xnfp->tx_pkt_info[i].mp == NULL)
+ for (i = 0; i < xnfp->xnf_n_tx; i++) {
+ if (xnfp->xnf_tx_pkt_info[i].mp == NULL)
continue;
- freemsg(xnfp->tx_pkt_info[i].mp);
- xnfp->tx_pkt_info[i].mp = NULL;
- (void) ddi_dma_unbind_handle(xnfp->tx_pkt_info[i].dma_handle);
+ freemsg(xnfp->xnf_tx_pkt_info[i].mp);
+ xnfp->xnf_tx_pkt_info[i].mp = NULL;
+ (void) ddi_dma_unbind_handle(
+ xnfp->xnf_tx_pkt_info[i].dma_handle);
}
}
@@ -1896,15 +2061,15 @@ xnf_release_mblks(xnf_t *xnfp)
* Called with the tx_buf_mutex held.
*/
static struct xnf_buffer_desc *
-xnf_get_xmit_buffer(xnf_t *xnfp)
+xnf_get_tx_buffer(xnf_t *xnfp)
{
struct xnf_buffer_desc *bdesc;
- bdesc = xnfp->xmit_free_list;
+ bdesc = xnfp->xnf_tx_free_list;
if (bdesc != NULL) {
- xnfp->xmit_free_list = bdesc->next;
+ xnfp->xnf_tx_free_list = bdesc->next;
} else {
- bdesc = xnf_alloc_xmit_buffer(xnfp);
+ bdesc = xnf_alloc_tx_buffer(xnfp);
}
return (bdesc);
}
@@ -1919,10 +2084,10 @@ xnf_get_buffer(xnf_t *xnfp)
{
struct xnf_buffer_desc *bdesc;
- bdesc = xnfp->free_list;
+ bdesc = xnfp->xnf_free_list;
if (bdesc != NULL) {
- xnfp->free_list = bdesc->next;
- xnfp->rx_descs_free--;
+ xnfp->xnf_free_list = bdesc->next;
+ xnfp->xnf_rx_descs_free--;
} else {
bdesc = xnf_alloc_buffer(xnfp);
}
@@ -1933,32 +2098,45 @@ xnf_get_buffer(xnf_t *xnfp)
* Free a xmit buffer back to the xmit free list
*/
static void
-xnf_free_xmit_buffer(struct xnf_buffer_desc *bp)
+xnf_free_tx_buffer(struct xnf_buffer_desc *bp)
{
xnf_t *xnfp = bp->xnfp;
- mutex_enter(&xnfp->tx_buf_mutex);
- bp->next = xnfp->xmit_free_list;
- xnfp->xmit_free_list = bp;
- mutex_exit(&xnfp->tx_buf_mutex);
+ mutex_enter(&xnfp->xnf_tx_buf_mutex);
+ bp->next = xnfp->xnf_tx_free_list;
+ xnfp->xnf_tx_free_list = bp;
+ mutex_exit(&xnfp->xnf_tx_buf_mutex);
}
/*
* Put a buffer descriptor onto the head of the free list.
+ * for page-flip:
* We can't really free these buffers back to the kernel
* since we have given away their backing page to be used
* by the back end net driver.
+ * for hvcopy:
+ * release all the memory
*/
static void
-xnf_free_buffer(struct xnf_buffer_desc *bp)
+xnf_free_buffer(struct xnf_buffer_desc *bdesc)
{
- xnf_t *xnfp = bp->xnfp;
+ xnf_t *xnfp = bdesc->xnfp;
- mutex_enter(&xnfp->rx_buf_mutex);
- bp->next = xnfp->free_list;
- xnfp->free_list = bp;
- xnfp->rx_descs_free++;
- mutex_exit(&xnfp->rx_buf_mutex);
+ mutex_enter(&xnfp->xnf_rx_buf_mutex);
+ if (xnfp->xnf_rx_hvcopy) {
+ if (ddi_dma_unbind_handle(bdesc->dma_handle) != DDI_SUCCESS)
+ goto out;
+ ddi_dma_mem_free(&bdesc->acc_handle);
+ ddi_dma_free_handle(&bdesc->dma_handle);
+ kmem_free(bdesc, sizeof (*bdesc));
+ xnfp->xnf_rx_buffer_count--;
+ } else {
+ bdesc->next = xnfp->xnf_free_list;
+ xnfp->xnf_free_list = bdesc;
+ xnfp->xnf_rx_descs_free++;
+ }
+out:
+ mutex_exit(&xnfp->xnf_rx_buf_mutex);
}
/*
@@ -1966,7 +2144,7 @@ xnf_free_buffer(struct xnf_buffer_desc *bp)
* keep track of the buffer. Called with tx_buf_mutex held.
*/
static struct xnf_buffer_desc *
-xnf_alloc_xmit_buffer(xnf_t *xnfp)
+xnf_alloc_tx_buffer(xnf_t *xnfp)
{
struct xnf_buffer_desc *bdesc;
size_t len;
@@ -1975,7 +2153,7 @@ xnf_alloc_xmit_buffer(xnf_t *xnfp)
return (NULL);
/* allocate a DMA access handle for receive buffer */
- if (ddi_dma_alloc_handle(xnfp->devinfo, &tx_buffer_dma_attr,
+ if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buffer_dma_attr,
0, 0, &bdesc->dma_handle) != DDI_SUCCESS)
goto failure;
@@ -1983,14 +2161,14 @@ xnf_alloc_xmit_buffer(xnf_t *xnfp)
if (ddi_dma_mem_alloc(bdesc->dma_handle,
PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0,
&bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS)
- goto late_failure;
+ goto failure_1;
bdesc->xnfp = xnfp;
- xnfp->xmit_buffer_count++;
+ xnfp->xnf_tx_buffer_count++;
return (bdesc);
-late_failure:
+failure_1:
ddi_dma_free_handle(&bdesc->dma_handle);
failure:
@@ -2012,14 +2190,14 @@ xnf_alloc_buffer(xnf_t *xnfp)
long cnt;
pfn_t pfn;
- if (xnfp->recv_buffer_count >= xnfp->max_recv_bufs)
+ if (xnfp->xnf_rx_buffer_count >= xnfp->xnf_max_rx_bufs)
return (NULL);
if ((bdesc = kmem_zalloc(sizeof (*bdesc), KM_NOSLEEP)) == NULL)
return (NULL);
/* allocate a DMA access handle for receive buffer */
- if (ddi_dma_alloc_handle(xnfp->devinfo, &rx_buffer_dma_attr,
+ if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buffer_dma_attr,
0, 0, &bdesc->dma_handle) != DDI_SUCCESS)
goto failure;
@@ -2027,39 +2205,46 @@ xnf_alloc_buffer(xnf_t *xnfp)
if (ddi_dma_mem_alloc(bdesc->dma_handle,
PAGESIZE, &data_accattr, DDI_DMA_STREAMING, 0, 0,
&bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS)
- goto late_failure;
+ goto failure_1;
/* bind to virtual address of buffer to get physical address */
if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL,
bdesc->buf, PAGESIZE, DDI_DMA_READ | DDI_DMA_STREAMING,
DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED)
- goto late_late_failure;
+ goto failure_2;
bdesc->buf_phys = dma_cookie.dmac_laddress;
bdesc->xnfp = xnfp;
- bdesc->free_rtn.free_func = xnf_rcv_complete;
+ if (xnfp->xnf_rx_hvcopy) {
+ bdesc->free_rtn.free_func = xnf_copy_rcv_complete;
+ } else {
+ bdesc->free_rtn.free_func = xnf_rcv_complete;
+ }
bdesc->free_rtn.free_arg = (char *)bdesc;
bdesc->grant_ref = GRANT_INVALID_REF;
ASSERT(ncookies == 1);
- xnfp->recv_buffer_count++;
- /*
- * Unmap the page, and hand the machine page back
- * to xen so it can be used as a backend net buffer.
- */
- pfn = xnf_btop(bdesc->buf_phys);
- cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn);
- if (cnt != 1) {
- cmn_err(CE_WARN, "unable to give a page back to the "
- "hypervisor\n");
+ xnfp->xnf_rx_buffer_count++;
+
+ if (!xnfp->xnf_rx_hvcopy) {
+ /*
+ * Unmap the page, and hand the machine page back
+ * to xen so it can be used as a backend net buffer.
+ */
+ pfn = xnf_btop(bdesc->buf_phys);
+ cnt = balloon_free_pages(1, NULL, bdesc->buf, &pfn);
+ if (cnt != 1) {
+ cmn_err(CE_WARN, "unable to give a page back to the "
+ "hypervisor\n");
+ }
}
return (bdesc);
-late_late_failure:
+failure_2:
ddi_dma_mem_free(&bdesc->acc_handle);
-late_failure:
+failure_1:
ddi_dma_free_handle(&bdesc->dma_handle);
failure:
@@ -2067,40 +2252,129 @@ failure:
return (NULL);
}
+/*
+ * Statistics.
+ */
+static char *xnf_aux_statistics[] = {
+ "tx_cksum_deferred",
+ "rx_cksum_no_need",
+ "interrupts",
+ "unclaimed_interrupts",
+ "tx_pullup",
+ "tx_pagebndry",
+ "tx_attempt",
+ "rx_no_ringbuf",
+ "hvcopy_packet_processed",
+};
+
+static int
+xnf_kstat_aux_update(kstat_t *ksp, int flag)
+{
+ xnf_t *xnfp;
+ kstat_named_t *knp;
+
+ if (flag != KSTAT_READ)
+ return (EACCES);
+
+ xnfp = ksp->ks_private;
+ knp = ksp->ks_data;
+
+ /*
+ * Assignment order must match that of the names in
+ * xnf_aux_statistics.
+ */
+ (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred;
+ (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need;
+
+ (knp++)->value.ui64 = xnfp->xnf_stat_interrupts;
+ (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts;
+ (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup;
+ (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry;
+ (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt;
+ (knp++)->value.ui64 = xnfp->xnf_stat_rx_no_ringbuf;
+
+ (knp++)->value.ui64 = xnfp->xnf_stat_hvcopy_packet_processed;
+
+ return (0);
+}
+
+static boolean_t
+xnf_kstat_init(xnf_t *xnfp)
+{
+ int nstat = sizeof (xnf_aux_statistics) /
+ sizeof (xnf_aux_statistics[0]);
+ char **cp = xnf_aux_statistics;
+ kstat_named_t *knp;
+
+ /*
+ * Create and initialise kstats.
+ */
+ if ((xnfp->xnf_kstat_aux = kstat_create("xnf",
+ ddi_get_instance(xnfp->xnf_devinfo),
+ "aux_statistics", "net", KSTAT_TYPE_NAMED,
+ nstat, 0)) == NULL)
+ return (B_FALSE);
+
+ xnfp->xnf_kstat_aux->ks_private = xnfp;
+ xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update;
+
+ knp = xnfp->xnf_kstat_aux->ks_data;
+ while (nstat > 0) {
+ kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
+
+ knp++;
+ cp++;
+ nstat--;
+ }
+
+ kstat_install(xnfp->xnf_kstat_aux);
+
+ return (B_TRUE);
+}
+
static int
xnf_stat(void *arg, uint_t stat, uint64_t *val)
{
xnf_t *xnfp = arg;
- mutex_enter(&xnfp->intrlock);
- mutex_enter(&xnfp->txlock);
+ mutex_enter(&xnfp->xnf_intrlock);
+ mutex_enter(&xnfp->xnf_txlock);
-#define map_stat(q, r) \
+#define mac_stat(q, r) \
case (MAC_STAT_##q): \
- *val = xnfp->stat_##r; \
+ *val = xnfp->xnf_stat_##r; \
+ break
+
+#define ether_stat(q, r) \
+ case (ETHER_STAT_##q): \
+ *val = xnfp->xnf_stat_##r; \
break
switch (stat) {
- map_stat(IPACKETS, ipackets);
- map_stat(OPACKETS, opackets);
- map_stat(RBYTES, rbytes);
- map_stat(OBYTES, obytes);
- map_stat(NORCVBUF, norcvbuf);
- map_stat(IERRORS, errrcv);
- map_stat(NOXMTBUF, xmit_defer);
+ mac_stat(IPACKETS, ipackets);
+ mac_stat(OPACKETS, opackets);
+ mac_stat(RBYTES, rbytes);
+ mac_stat(OBYTES, obytes);
+ mac_stat(NORCVBUF, norxbuf);
+ mac_stat(IERRORS, errrx);
+ mac_stat(NOXMTBUF, tx_defer);
+
+ ether_stat(MACRCV_ERRORS, mac_rcv_error);
+ ether_stat(TOOSHORT_ERRORS, runt);
default:
- mutex_exit(&xnfp->txlock);
- mutex_exit(&xnfp->intrlock);
+ mutex_exit(&xnfp->xnf_txlock);
+ mutex_exit(&xnfp->xnf_intrlock);
return (ENOTSUP);
}
-#undef map_stat
+#undef mac_stat
+#undef ether_stat
- mutex_exit(&xnfp->txlock);
- mutex_exit(&xnfp->intrlock);
+ mutex_exit(&xnfp->xnf_txlock);
+ mutex_exit(&xnfp->xnf_intrlock);
return (0);
}
@@ -2134,7 +2408,7 @@ xnf_resources(void *arg)
mrf.mrf_normal_blank_time = 128; /* XXPV dme: see xnf_blank() */
mrf.mrf_normal_pkt_count = 8; /* XXPV dme: see xnf_blank() */
- xnfp->rx_handle = mac_resource_add(xnfp->mh,
+ xnfp->xnf_rx_handle = mac_resource_add(xnfp->xnf_mh,
(mac_resource_t *)&mrf);
}
@@ -2166,7 +2440,7 @@ xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data)
* not zero. (In fact, a Solaris dom0 is happy to deal
* with a checksum of zero, but a Linux dom0 is not.)
*/
- if (xnfp->cksum_offload)
+ if (xnfp->xnf_cksum_offload)
*capab = HCKSUM_INET_PARTIAL;
else
*capab = 0;
@@ -2196,19 +2470,42 @@ oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
switch (new_state) {
case XenbusStateConnected:
- mutex_enter(&xnfp->intrlock);
- mutex_enter(&xnfp->txlock);
+ mutex_enter(&xnfp->xnf_intrlock);
+ mutex_enter(&xnfp->xnf_txlock);
- xnfp->connected = B_TRUE;
- cv_broadcast(&xnfp->cv);
+ xnfp->xnf_connected = B_TRUE;
+ cv_broadcast(&xnfp->xnf_cv);
- mutex_exit(&xnfp->txlock);
- mutex_exit(&xnfp->intrlock);
+ mutex_exit(&xnfp->xnf_txlock);
+ mutex_exit(&xnfp->xnf_intrlock);
- ec_notify_via_evtchn(xnfp->evtchn);
+ ec_notify_via_evtchn(xnfp->xnf_evtchn);
break;
default:
break;
}
}
+
+/*
+ * Check whether backend is capable of and willing to talk
+ * to us via hypervisor copy, as opposed to page flip.
+ */
+static boolean_t
+xnf_hvcopy_peer_status(dev_info_t *devinfo)
+{
+ int be_rx_copy;
+ int err;
+
+ err = xenbus_scanf(XBT_NULL, xvdi_get_oename(devinfo),
+ "feature-rx-copy", "%d", &be_rx_copy);
+ /*
+ * If we fail to read the store we assume that the key is
+ * absent, implying an older domain at the far end. Older
+ * domains cannot do HV copy (we assume ..).
+ */
+ if (err != 0)
+ be_rx_copy = 0;
+
+ return (be_rx_copy?B_TRUE:B_FALSE);
+}
diff --git a/usr/src/uts/common/xen/io/xnf.h b/usr/src/uts/common/xen/io/xnf.h
index 7f664ee802..19f7898b0d 100644
--- a/usr/src/uts/common/xen/io/xnf.h
+++ b/usr/src/uts/common/xen/io/xnf.h
@@ -29,12 +29,6 @@
#pragma ident "%Z%%M% %I% %E% SMI"
-#include <sys/types.h>
-#include <sys/kstat.h>
-#include <sys/hypervisor.h>
-#include <xen/public/io/netif.h>
-#include <xen/sys/xenbus_impl.h>
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -94,77 +88,83 @@ struct tx_pktinfo {
/* Per network-interface-controller driver private structure */
typedef struct xnf {
/* most interesting stuff first to assist debugging */
- dev_info_t *devinfo; /* System per-device info. */
- mac_handle_t mh; /* Nemo per-device info. */
- int rx_bufs_outstanding;
- int tx_descs_free;
- int rx_descs_free; /* count of free rx bufs */
- int n_xmits; /* No. xmit descriptors */
- int n_recvs; /* No. recv descriptors */
- int n_recv_bufs; /* No. recv DMA buffers */
- int tx_start_thresh_regval;
- unsigned char mac_addr[ETHERADDRL];
- int max_recv_bufs;
- int recv_buffer_count;
- int xmit_buffer_count;
-
- boolean_t connected;
- boolean_t running;
-
- boolean_t cksum_offload;
-
- uint64_t stat_intr;
- uint64_t stat_norcvbuf;
- uint64_t stat_errrcv;
-
- uint64_t stat_xmit_attempt;
- uint64_t stat_xmit_pullup;
- uint64_t stat_xmit_pagebndry;
- uint64_t stat_xmit_defer;
- uint64_t stat_rx_no_ringbuf;
- uint64_t stat_mac_rcv_error;
- uint64_t stat_runt;
-
- uint64_t stat_ipackets;
- uint64_t stat_opackets;
- uint64_t stat_rbytes;
- uint64_t stat_obytes;
-
- uint64_t stat_tx_cksum_deferred;
- uint64_t stat_rx_cksum_no_need;
-
- kstat_t *kstat_aux;
-
- struct xnf_buffer_desc *free_list;
- struct xnf_buffer_desc *xmit_free_list;
- int tx_pkt_id_list; /* free list of avail pkt ids */
- struct tx_pktinfo tx_pkt_info[NET_TX_RING_SIZE];
- struct xnf_buffer_desc *rxpkt_bufptr[XNF_MAX_RXDESCS];
-
- mac_resource_handle_t rx_handle;
- ddi_iblock_cookie_t icookie;
- kmutex_t tx_buf_mutex;
- kmutex_t rx_buf_mutex;
- kmutex_t txlock;
- kmutex_t intrlock;
- boolean_t tx_pages_readonly;
-
- netif_tx_front_ring_t tx_ring; /* tx interface struct ptr */
- ddi_dma_handle_t tx_ring_dma_handle;
- ddi_acc_handle_t tx_ring_dma_acchandle;
- paddr_t tx_ring_phys_addr;
- grant_ref_t tx_ring_ref;
-
- netif_rx_front_ring_t rx_ring; /* rx interface struct ptr */
- ddi_dma_handle_t rx_ring_dma_handle;
- ddi_acc_handle_t rx_ring_dma_acchandle;
- paddr_t rx_ring_phys_addr;
- grant_ref_t rx_ring_ref;
-
- uint16_t evtchn; /* channel to back end ctlr */
- grant_ref_t gref_tx_head; /* tx grant free list */
- grant_ref_t gref_rx_head; /* rx grant free list */
- kcondvar_t cv;
+ dev_info_t *xnf_devinfo; /* System per-device info. */
+ mac_handle_t xnf_mh; /* Nemo per-device info. */
+ int xnf_rx_bufs_outstanding;
+ int xnf_tx_descs_free;
+ int xnf_rx_descs_free; /* count of free rx bufs */
+ int xnf_n_tx; /* No. xmit descriptors */
+ int xnf_n_rx; /* No. recv descriptors */
+ int xnf_n_rx_bufs; /* No. recv DMA buffers */
+ int xnf_tx_start_thresh_regval;
+ unsigned char xnf_mac_addr[ETHERADDRL];
+ int xnf_max_rx_bufs;
+ int xnf_rx_buffer_count;
+ int xnf_tx_buffer_count;
+
+ boolean_t xnf_connected;
+ boolean_t xnf_running;
+
+ boolean_t xnf_cksum_offload;
+
+ uint64_t xnf_stat_interrupts;
+ uint64_t xnf_stat_unclaimed_interrupts;
+ uint64_t xnf_stat_norxbuf;
+ uint64_t xnf_stat_errrx;
+
+ uint64_t xnf_stat_tx_attempt;
+ uint64_t xnf_stat_tx_pullup;
+ uint64_t xnf_stat_tx_pagebndry;
+ uint64_t xnf_stat_tx_defer;
+ uint64_t xnf_stat_rx_no_ringbuf;
+ uint64_t xnf_stat_mac_rcv_error;
+ uint64_t xnf_stat_runt;
+
+ uint64_t xnf_stat_ipackets;
+ uint64_t xnf_stat_opackets;
+ uint64_t xnf_stat_rbytes;
+ uint64_t xnf_stat_obytes;
+
+ uint64_t xnf_stat_tx_cksum_deferred;
+ uint64_t xnf_stat_rx_cksum_no_need;
+ uint64_t xnf_stat_hvcopy_enabled; /* on/off */
+ uint64_t xnf_stat_hvcopy_packet_processed;
+
+ kstat_t *xnf_kstat_aux;
+
+ struct xnf_buffer_desc *xnf_free_list;
+ struct xnf_buffer_desc *xnf_tx_free_list;
+ int xnf_tx_pkt_id_list;
+ /* free list of avail pkt ids */
+ struct tx_pktinfo xnf_tx_pkt_info[NET_TX_RING_SIZE];
+ struct xnf_buffer_desc *xnf_rxpkt_bufptr[XNF_MAX_RXDESCS];
+
+ mac_resource_handle_t xnf_rx_handle;
+ ddi_iblock_cookie_t xnf_icookie;
+ kmutex_t xnf_tx_buf_mutex;
+ kmutex_t xnf_rx_buf_mutex;
+ kmutex_t xnf_txlock;
+ kmutex_t xnf_intrlock;
+ boolean_t xnf_tx_pages_readonly;
+
+ netif_tx_front_ring_t xnf_tx_ring; /* tx interface struct ptr */
+ ddi_dma_handle_t xnf_tx_ring_dma_handle;
+ ddi_acc_handle_t xnf_tx_ring_dma_acchandle;
+ paddr_t xnf_tx_ring_phys_addr;
+ grant_ref_t xnf_tx_ring_ref;
+
+ netif_rx_front_ring_t xnf_rx_ring; /* rx interface struct ptr */
+ ddi_dma_handle_t xnf_rx_ring_dma_handle;
+ ddi_acc_handle_t xnf_rx_ring_dma_acchandle;
+ paddr_t xnf_rx_ring_phys_addr;
+ grant_ref_t xnf_rx_ring_ref;
+
+ uint16_t xnf_evtchn; /* channel to back end ctlr */
+ grant_ref_t xnf_gref_tx_head; /* tx grant free list */
+ grant_ref_t xnf_gref_rx_head; /* rx grant free list */
+ kcondvar_t xnf_cv;
+
+ boolean_t xnf_rx_hvcopy; /* do we do HV copy? */
} xnf_t;
#ifdef __cplusplus
diff --git a/usr/src/uts/common/xen/io/xpvd.c b/usr/src/uts/common/xen/io/xpvd.c
index c989960444..34408e16f8 100644
--- a/usr/src/uts/common/xen/io/xpvd.c
+++ b/usr/src/uts/common/xen/io/xpvd.c
@@ -36,7 +36,6 @@
*/
#include <sys/conf.h>
-#include <sys/hypervisor.h>
#include <sys/kmem.h>
#include <sys/debug.h>
#include <sys/modctl.h>
@@ -46,18 +45,29 @@
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
-#include <sys/mach_intr.h>
-#include <sys/evtchn_impl.h>
#include <sys/avintr.h>
#include <sys/psm.h>
#include <sys/spl.h>
#include <sys/promif.h>
#include <sys/list.h>
-#include <sys/xen_mmu.h>
#include <sys/bootconf.h>
#include <sys/bootsvcs.h>
-#include <sys/bootinfo.h>
#include <util/sscanf.h>
+#include <sys/mach_intr.h>
+#include <sys/bootinfo.h>
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#include <sys/hypervisor.h>
+#include <sys/archsystm.h>
+#include <sys/cpu.h>
+#include <public/xen.h>
+#include <public/event_channel.h>
+#include <public/io/xenbus.h>
+#else
+#include <sys/hypervisor.h>
+#include <sys/evtchn_impl.h>
+#include <sys/xen_mmu.h>
+#endif
#include <xen/sys/xenbus_impl.h>
#include <xen/sys/xendev.h>
@@ -173,6 +183,10 @@ static ndi_event_set_t xpvd_ndi_events = {
static ndi_event_hdl_t xpvd_ndi_event_handle;
+#ifdef XPV_HVM_DRIVER
+static int hvm_vdev_num[26];
+#endif
+
/*
* Hypervisor interrupt capabilities
*/
@@ -236,7 +250,16 @@ static int
xpvd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
extern void xvdi_watch_devices(int);
- xpvd_dip = devi;
+
+#ifdef XPV_HVM_DRIVER
+ if (xen_info == NULL) {
+ if (ddi_hold_installed_driver(ddi_name_to_major("xpv")) ==
+ NULL) {
+ cmn_err(CE_WARN, "Couldn't initialize xpv framework");
+ return (DDI_FAILURE);
+ }
+ }
+#endif
if (ndi_event_alloc_hdl(devi, 0, &xpvd_ndi_event_handle,
NDI_SLEEP) != NDI_SUCCESS) {
@@ -256,6 +279,7 @@ xpvd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
else
xvdi_watch_devices(XENSTORE_UP);
+ xpvd_dip = devi;
ddi_report_dev(devi);
return (DDI_SUCCESS);
@@ -557,6 +581,9 @@ xpvd_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op,
case DDI_INTROP_SETMASK:
case DDI_INTROP_CLRMASK:
+#ifdef XPV_HVM_DRIVER
+ return (DDI_ENOTSUP);
+#else
/*
* Handle this here
*/
@@ -568,14 +595,18 @@ xpvd_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op,
ec_enable_irq(hdlp->ih_vector);
}
break;
-
+#endif
case DDI_INTROP_GETPENDING:
+#ifdef XPV_HVM_DRIVER
+ return (DDI_ENOTSUP);
+#else
if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
return (DDI_FAILURE);
*(int *)result = ec_pending_irq(hdlp->ih_vector);
DDI_INTR_NEXDBG((CE_CONT, "xpvd: GETPENDING returned = %x\n",
*(int *)result));
break;
+#endif
case DDI_INTROP_NAVAIL:
*(int *)result = 1;
@@ -689,6 +720,11 @@ xpvd_name_child(dev_info_t *child, char *name, int namelen)
int *domain, *vdev;
uint_t ndomain, nvdev;
char *unit_address;
+ int devno;
+#ifdef XPV_HVM_DRIVER
+ char *xip;
+ int xenstore_id;
+#endif
/*
* i_xpvd_parse_devname() knows the formats used by this
@@ -721,11 +757,45 @@ xpvd_name_child(dev_info_t *child, char *name, int namelen)
/*
* Use "unit-address" property (frontend/softdev drivers).
+ *
+ * For PV domains, the disk name should be a simple number. In an
+ * HVM domain, it will be a string of the form hdX. In the latter
+ * case we convert hda to 0, hdb to 1, and so on.
*/
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, child,
DDI_PROP_DONTPASS, "unit-address", &unit_address)
== DDI_PROP_SUCCESS) {
- (void) snprintf(name, namelen, "%s", unit_address);
+ devno = -1;
+ if (unit_address[0] >= '0' && unit_address[0] <= '9')
+ (void) sscanf(unit_address, "%d", &devno);
+#ifdef XPV_HVM_DRIVER
+ /*
+ * XXX: we should really check the device class here. We
+ * always want to set hvm_vdev_num[] - even if we somehow
+ * end up with a non-hdX device name.
+ */
+ else if (strlen(unit_address) == 3 &&
+ unit_address[0] == 'h' && unit_address[1] == 'd') {
+ devno = unit_address[2] - 'a';
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, child,
+ DDI_PROP_DONTPASS, "xenstore-id", &xip)
+ == DDI_PROP_SUCCESS) {
+ (void) sscanf(xip, "%d", &xenstore_id);
+ ddi_prop_free(xip);
+ hvm_vdev_num[devno] = xenstore_id;
+ } else {
+ devno = -1;
+ }
+ }
+#endif
+
+ if (devno < 0) {
+ cmn_err(CE_WARN, "Unrecognized device: %s",
+ unit_address);
+ ddi_prop_free(unit_address);
+ return (DDI_FAILURE);
+ }
+ (void) snprintf(name, namelen, "%x", devno);
ddi_prop_free(unit_address);
return (DDI_SUCCESS);
}
@@ -846,10 +916,20 @@ i_xpvd_parse_devname(char *name, xendev_devclass_t *devclassp,
/* Frontend format is "<vdev>". */
*domp = DOMID_SELF;
if (sscanf(caddr, "%x", vdevp) == 1) {
+#ifdef XPV_HVM_DRIVER
+ if (*devclassp == XEN_VBLK) {
+ if (*vdevp < 0 || *vdevp > 26) {
+ *vdevp = -1;
+ goto done;
+ }
+ *vdevp = hvm_vdev_num[*vdevp];
+ }
+#endif
ret = B_TRUE;
goto done;
}
+
done:
kmem_free(device_name, len);
return (ret);
diff --git a/usr/src/uts/common/xen/io/xpvd.conf b/usr/src/uts/common/xen/io/xpvd.conf
new file mode 100644
index 0000000000..55262457f9
--- /dev/null
+++ b/usr/src/uts/common/xen/io/xpvd.conf
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+# ident "%Z%%M% %I% %E% SMI"
+
+name="xpvd" class="root";
diff --git a/usr/src/uts/i86xpv/os/gnttab.c b/usr/src/uts/common/xen/os/gnttab.c
index 5284b02ea4..238c45768e 100644
--- a/usr/src/uts/i86xpv/os/gnttab.c
+++ b/usr/src/uts/common/xen/os/gnttab.c
@@ -58,6 +58,11 @@
#include <sys/types.h>
#include <sys/archsystm.h>
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#include <sys/mman.h>
+#include <vm/hat.h>
+#endif
#include <sys/hypervisor.h>
#include <sys/gnttab.h>
#include <sys/sysmacros.h>
@@ -77,11 +82,13 @@
#include <vm/hat_i86.h>
#include <sys/bootconf.h>
#include <sys/bootsvcs.h>
+#ifndef XPV_HVM_DRIVER
#include <sys/bootinfo.h>
#include <sys/multiboot.h>
+#include <vm/kboot_mmu.h>
+#endif
#include <sys/bootvfs.h>
#include <sys/bootprops.h>
-#include <vm/kboot_mmu.h>
#include <vm/seg_kmem.h>
#define cmpxchg(t, c, n) atomic_cas_16((t), (c), (n))
@@ -410,6 +417,61 @@ out:
mutex_exit(&gnttab_list_lock);
}
+#ifdef XPV_HVM_DRIVER
+
+static void
+gnttab_map(void)
+{
+ struct xen_add_to_physmap xatp;
+ caddr_t va;
+ pfn_t pfn;
+ int i;
+
+ va = (caddr_t)shared;
+ for (i = 0; i < NR_GRANT_FRAMES; i++) {
+ pfn = hat_getpfnum(kas.a_hat, va);
+
+ xatp.domid = DOMID_SELF;
+ xatp.idx = i;
+ xatp.space = XENMAPSPACE_grant_table;
+ xatp.gpfn = pfn;
+ hat_unload(kas.a_hat, va, MMU_PAGESIZE, HAT_UNLOAD);
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0)
+ panic("Couldn't map grant table");
+
+ hat_devload(kas.a_hat, va, MMU_PAGESIZE, pfn,
+ PROT_READ | PROT_WRITE,
+ HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
+
+ va += MMU_PAGESIZE;
+ }
+}
+
+void
+gnttab_init(void)
+{
+ int i;
+
+ shared = (grant_entry_t *)xen_alloc_pages(NR_GRANT_FRAMES);
+
+ gnttab_map();
+
+ for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++)
+ gnttab_list[i] = i + 1;
+ gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES;
+ gnttab_free_head = NR_RESERVED_ENTRIES;
+
+ mutex_init(&gnttab_list_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+gnttab_resume(void)
+{
+ gnttab_map();
+}
+
+#else /* XPV_HVM_DRIVER */
+
void
gnttab_init(void)
{
@@ -472,6 +534,8 @@ gnttab_resume(void)
}
}
+#endif /* XPV_HVM_DRIVER */
+
void
gnttab_suspend(void)
{
diff --git a/usr/src/uts/i86xpv/os/hypercall.c b/usr/src/uts/common/xen/os/hypercall.c
index ca753bb716..fae533dfbf 100644
--- a/usr/src/uts/i86xpv/os/hypercall.c
+++ b/usr/src/uts/common/xen/os/hypercall.c
@@ -39,6 +39,9 @@
*/
#include <sys/types.h>
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#endif
#include <sys/hypervisor.h>
#include <xen/public/sched.h>
@@ -215,7 +218,7 @@ HYPERVISOR_grant_table_op(uint_t cmd, void *uop, uint_t count)
ret_val = __hypercall3(__HYPERVISOR_grant_table_op,
(long)cmd, (ulong_t)uop, (ulong_t)count);
-#if !defined(_BOOT)
+#if !defined(_BOOT) && !defined(XPV_HVM_DRIVER)
/*
* XXPV --
* The map_grant_ref call suffers a poor design flaw.
diff --git a/usr/src/uts/common/xen/os/xvdi.c b/usr/src/uts/common/xen/os/xvdi.c
index 347ca8bc3e..4eede251bd 100644
--- a/usr/src/uts/common/xen/os/xvdi.c
+++ b/usr/src/uts/common/xen/os/xvdi.c
@@ -43,8 +43,6 @@
*/
#include <sys/conf.h>
#include <sys/param.h>
-#include <sys/hypervisor.h>
-#include <sys/xen_mmu.h>
#include <sys/kmem.h>
#include <vm/seg_kmem.h>
#include <sys/debug.h>
@@ -57,8 +55,6 @@
#include <sys/sunndi.h>
#include <sys/sunldi.h>
#include <sys/fs/dv_node.h>
-#include <sys/evtchn_impl.h>
-#include <sys/gnttab.h>
#include <sys/avintr.h>
#include <sys/psm.h>
#include <sys/spl.h>
@@ -68,8 +64,22 @@
#include <sys/bootsvcs.h>
#include <sys/bootinfo.h>
#include <sys/note.h>
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#include <sys/hypervisor.h>
+#include <public/grant_table.h>
+#include <public/xen.h>
+#include <public/io/xenbus.h>
+#include <public/io/xs_wire.h>
+#include <public/event_channel.h>
+#include <public/io/xenbus.h>
+#else /* XPV_HVM_DRIVER */
+#include <sys/hypervisor.h>
#include <sys/xen_mmu.h>
#include <xen/sys/xenbus_impl.h>
+#include <sys/evtchn_impl.h>
+#endif /* XPV_HVM_DRIVER */
+#include <sys/gnttab.h>
#include <xen/sys/xendev.h>
#include <vm/hat_i86.h>
#include <sys/scsi/generic/inquiry.h>
@@ -79,7 +89,9 @@
static void xvdi_ring_init_sring(xendev_ring_t *);
static void xvdi_ring_init_front_ring(xendev_ring_t *, size_t, size_t);
+#ifndef XPV_HVM_DRIVER
static void xvdi_ring_init_back_ring(xendev_ring_t *, size_t, size_t);
+#endif
static void xvdi_reinit_ring(dev_info_t *, grant_ref_t *, xendev_ring_t *);
static int i_xvdi_add_watches(dev_info_t *);
@@ -320,6 +332,19 @@ xvdi_init_dev(dev_info_t *dip)
dip, "unit-address", prop_str);
kmem_free(prop_str, prop_len);
}
+#ifdef XPV_HVM_DRIVER
+ /*
+ * The mapping between the 'dev' name and the
+ * device ID maintained by Xenstore has to be
+ * tracked explicitly in HVM domains.
+ */
+ prop_str = strrchr(pdp->xd_xsdev.otherend, '/');
+ if (prop_str != NULL) {
+ prop_str = ((caddr_t)prop_str) + 1;
+ (void) ndi_prop_update_string(DDI_DEV_T_NONE,
+ dip, "xenstore-id", prop_str);
+ }
+#endif /* XPV_HVM_DRIVER */
break;
default:
break;
@@ -342,7 +367,9 @@ xvdi_uninit_dev(dev_info_t *dip)
i_xvdi_rem_watches(dip);
/* tell other end to close */
- (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
+ if (pdp->xd_xsdev.otherend_id != (domid_t)-1)
+ (void) xvdi_switch_state(dip, XBT_NULL,
+ XenbusStateClosed);
if (pdp->xd_xsdev.nodename != NULL)
kmem_free((char *)(pdp->xd_xsdev.nodename),
@@ -392,7 +419,9 @@ xvdi_bind_evtchn(dev_info_t *dip, evtchn_port_t evtchn)
return (DDI_FAILURE);
}
}
+#ifndef XPV_HVM_DRIVER
pdp->xd_ispec.intrspec_vec = ec_bind_evtchn_to_irq(pdp->xd_evtchn);
+#endif
mutex_exit(&pdp->xd_lk);
return (DDI_SUCCESS);
@@ -435,7 +464,9 @@ xvdi_alloc_evtchn(dev_info_t *dip)
return (DDI_FAILURE);
}
}
+#ifndef XPV_HVM_DRIVER
pdp->xd_ispec.intrspec_vec = ec_bind_evtchn_to_irq(pdp->xd_evtchn);
+#endif
mutex_exit(&pdp->xd_lk);
return (DDI_SUCCESS);
@@ -455,13 +486,16 @@ xvdi_free_evtchn(dev_info_t *dip)
mutex_enter(&pdp->xd_lk);
if (pdp->xd_evtchn != INVALID_EVTCHN) {
+#ifndef XPV_HVM_DRIVER
ec_unbind_irq(pdp->xd_ispec.intrspec_vec);
- pdp->xd_evtchn = INVALID_EVTCHN;
pdp->xd_ispec.intrspec_vec = 0;
+#endif
+ pdp->xd_evtchn = INVALID_EVTCHN;
}
mutex_exit(&pdp->xd_lk);
}
+#ifndef XPV_HVM_DRIVER
/*
* Map an inter-domain communication ring for a virtual device.
* This is used by backend drivers.
@@ -566,6 +600,7 @@ xvdi_unmap_ring(xendev_ring_t *ring)
vmem_xfree(heap_arena, ring->xr_vaddr, PAGESIZE);
kmem_free(ring, sizeof (xendev_ring_t));
}
+#endif /* XPV_HVM_DRIVER */
/*
* Re-initialise an inter-domain communications ring for the backend domain.
@@ -1961,6 +1996,7 @@ xvdi_ring_init_front_ring(xendev_ring_t *ringp, size_t nentry, size_t entrysize)
ringp->xr_entry_size = entrysize;
}
+#ifndef XPV_HVM_DRIVER
static void
xvdi_ring_init_back_ring(xendev_ring_t *ringp, size_t nentry, size_t entrysize)
{
@@ -1975,6 +2011,7 @@ xvdi_ring_init_back_ring(xendev_ring_t *ringp, size_t nentry, size_t entrysize)
ringp->xr_frontend = 0;
ringp->xr_entry_size = entrysize;
}
+#endif /* XPV_HVM_DRIVER */
static void
xendev_offline_device(void *arg)
diff --git a/usr/src/uts/i86xpv/sys/gnttab.h b/usr/src/uts/common/xen/sys/gnttab.h
index 7066ae3243..7066ae3243 100644
--- a/usr/src/uts/i86xpv/sys/gnttab.h
+++ b/usr/src/uts/common/xen/sys/gnttab.h
diff --git a/usr/src/uts/common/xen/sys/xendev.h b/usr/src/uts/common/xen/sys/xendev.h
index b00a71fcf4..40a79e07e5 100644
--- a/usr/src/uts/common/xen/sys/xendev.h
+++ b/usr/src/uts/common/xen/sys/xendev.h
@@ -31,6 +31,11 @@
#include <sys/hypervisor.h>
#include <sys/taskq.h>
+#ifdef XPV_HVM_DRIVER
+#include <public/io/ring.h>
+#include <public/event_channel.h>
+#include <public/grant_table.h>
+#endif
#include <xen/sys/xenbus_impl.h>
#ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 00e7fabbe5..2bad82b0af 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -186,6 +186,11 @@ ROOTNEX_OBJS += rootnex.o
TZMON_OBJS += tzmon.o
UPPC_OBJS += uppc.o psm_common.o
XSVC_OBJS += xsvc.o
+XNF_OBJS += xnf.o
+XPV_OBJS += xpv_support.o xvdi.o gnttab.o evtchn.o \
+ xenbus_comms.o xenbus_client.o xenbus_probe.o xenbus_xs.o \
+ hypercall.o hypersubr.o
+XPVD_OBJS += xpvd.o
#
# Build up defines and paths.
diff --git a/usr/src/uts/i86pc/Makefile.hvm b/usr/src/uts/i86pc/Makefile.hvm
new file mode 100644
index 0000000000..3c53174cc4
--- /dev/null
+++ b/usr/src/uts/i86pc/Makefile.hvm
@@ -0,0 +1,67 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/Makefile.hvm
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile provides support for building PV drivers that run
+# in an HVM environment.
+#
+
+ROOT_HVM_DIR = $(ROOT)/platform/i86hvm
+ROOT_HVM_MOD_DIR = $(ROOT_HVM_DIR)/kernel
+ROOT_HVM_DRV_DIR_32 = $(ROOT_HVM_MOD_DIR)/drv
+ROOT_HVM_DRV_DIR_64 = $(ROOT_HVM_MOD_DIR)/drv/$(MACH64)
+ROOT_HVM_DRV_DIR = $(ROOT_HVM_DRV_DIR_$(CLASS))
+USR_HVM_DIR = $(ROOT)/usr/platform/i86hvm
+
+#
+# Indicate that we are building for the i86hvm semi-platform
+#
+CPPFLAGS += -DXPV_HVM_DRIVER
+ASFLAGS += -DXPV_HVM_DRIVER
+
+#
+# Installation targets and rules:
+#
+$(ROOT_HVM_DIR):
+ -$(INS.dir.root.sys)
+
+$(ROOT_HVM_MOD_DIR): $(ROOT_HVM_DIR)
+ -$(INS.dir.root.sys)
+
+$(ROOT_HVM_DRV_DIR): $(ROOT_MOD_DIR)
+ -$(INS.dir.root.sys)
+
+$(ROOT_HVM_MOD_DIR)/%: $(OBJS_DIR)/% $(ROOT_HVM_MOD_DIR) FRC
+ $(INS.file)
+
+$(ROOT_HVM_DRV_DIR)/%: $(OBJS_DIR)/% $(ROOT_HVM_DRV_DIR) FRC
+ $(INS.file)
+
+$(USR_HVM_DIR):
+ -$(INS.dir.root.sys)
+
+INSTALL_DEPS += $(ROOT_HVM_DIR) $(USR_HVM_DIR)
diff --git a/usr/src/uts/i86pc/Makefile.i86pc.shared b/usr/src/uts/i86pc/Makefile.i86pc.shared
index 311e8ee50b..59b73f2aca 100644
--- a/usr/src/uts/i86pc/Makefile.i86pc.shared
+++ b/usr/src/uts/i86pc/Makefile.i86pc.shared
@@ -253,6 +253,9 @@ DRV_KMODS += xsvc
DRV_KMODS += mc-amd
DRV_KMODS += tzmon
DRV_KMODS += battery
+DRV_KMODS += xnf
+DRV_KMODS += xpv
+DRV_KMODS += xpvd
DRV_KMODS += cpudrv
diff --git a/usr/src/uts/i86pc/Makefile.rules b/usr/src/uts/i86pc/Makefile.rules
index 8ca64e2fcb..afd1209ebf 100644
--- a/usr/src/uts/i86pc/Makefile.rules
+++ b/usr/src/uts/i86pc/Makefile.rules
@@ -110,6 +110,13 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/tzmon/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/xpv/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/xpv/%.s
+ $(COMPILE.s) -o $@ $<
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/ml/%.s
$(COMPILE.s) -o $@ $<
@@ -145,6 +152,14 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/gfx_private/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/xen/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/xen/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/xsvc/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -177,6 +192,9 @@ $(DBOOT_OBJS_DIR)/%.o: $(UTSBASE)/i86pc/boot/%.c
$(DBOOT_OBJS_DIR)/%.o: $(UTSBASE)/i86pc/dboot/%.c
$(i386_CC) $(CERRWARN) -O $(DBOOT_DEFS) $(DBOOT_CC_INCL) -c -o $@ $<
+$(DBOOT_OBJS_DIR)/%.o: $(UTSBASE)/intel/ia32/%.s
+ $(DBOOT_AS) -P -D_ASM $(DBOOT_DEFS) $(DBOOT_AS_INCL) -o $@ $<
+
$(DBOOT_OBJS_DIR)/%.o: $(COMMONBASE)/util/%.c
$(i386_CC) $(CERRWARN) -O $(DBOOT_DEFS) $(DBOOT_CC_INCL) -c -o $@ $<
@@ -286,6 +304,12 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/psm/%.s
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/tzmon/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/xpv/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/xpv/%.s
+ @($(LHEAD) $(LINT.s) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/ml/%.s
@($(LHEAD) $(LINT.s) $< $(LTAIL))
@@ -316,6 +340,12 @@ $(LINTS_DIR)/%.ln: $(SRC)/common/atomic/%.c
$(LINTS_DIR)/%.ln: $(SRC)/common/mc/mc-amd/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/os/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/gfx_private/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -346,4 +376,3 @@ $(DBOOT_LINTS_DIR)/%.ln: $(COMMONBASE)/util/%.c
$(DBOOT_LINTS_DIR)/%.ln: $(COMMONBASE)/util/i386/%.s
@($(LHEAD) $(DBOOT_LINT) $(DBOOT_LOCAL_LINTFLAGS) $< $(LTAIL))
-
diff --git a/usr/src/uts/i86pc/io/xpv/evtchn.c b/usr/src/uts/i86pc/io/xpv/evtchn.c
new file mode 100644
index 0000000000..3da34d406e
--- /dev/null
+++ b/usr/src/uts/i86pc/io/xpv/evtchn.c
@@ -0,0 +1,450 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/xpv_support.h>
+#include <sys/hypervisor.h>
+#include <sys/machsystm.h>
+#include <sys/mutex.h>
+#include <sys/cmn_err.h>
+#include <sys/dditypes.h>
+#include <sys/atomic.h>
+#include <sys/sysmacros.h>
+#include <sys/cpu.h>
+#include <sys/psw.h>
+#include <sys/psm.h>
+#include <sys/sdt.h>
+
+extern dev_info_t *xpv_dip;
+static ddi_intr_handle_t *evtchn_ihp = NULL;
+static ddi_softint_handle_t evtchn_to_handle[NR_EVENT_CHANNELS];
+static kmutex_t ec_lock;
+
+static int evtchn_callback_irq = -1;
+
+/*
+ * Xen defines structures shared between the hypervisor and domU using
+ * longs. Sigh. To support 32-bit domUs on a 64-bit hypervisor, we
+ * redefine the pending-events and masked-events bitmasks in terms of
+ * uint32_t's.
+ */
+static uint32_t *pending_events;
+static uint32_t *masked_events;
+static int event_array_size;
+#define EVTCHN_SHIFT 5 /* log2(NBBY * sizeof (uint32_t)) */
+
+/* Atomically get and clear an integer from memory. */
+#define GET_AND_CLEAR(type, size, src, targ) { \
+ volatile type *_vsrc = (volatile type *)src; \
+ membar_enter(); \
+ do { \
+ targ = *_vsrc; \
+ } while (atomic_cas_## size(_vsrc, targ, 0) != targ); \
+}
+
+#define GET_AND_CLEAR_32(src, targ) GET_AND_CLEAR(uint32_t, 32, src, targ)
+#define GET_AND_CLEAR_64(src, targ) GET_AND_CLEAR(uint64_t, 64, src, targ)
+
+/* Get the first and last bits set in a bitmap */
+#define GET_BOUNDS(bitmap, max, low, high) { \
+ int _i; \
+ low = high = -1; \
+ for (_i = 0; _i <= max; _i++) \
+ if (bitmap & ((uint64_t)1 << _i)) { \
+ if (low == -1) \
+ low = _i; \
+ high = _i; \
+ } \
+}
+
+/*
+ * Translate an event number into an index into the array of 32-bit
+ * bitmasks, and a bit within the proper word.
+ */
+static void
+get_event_bit(int evt, int *idx, uint32_t *bit)
+{
+ int evb;
+
+ *idx = evt >> EVTCHN_SHIFT;
+ evb = evt & ((1ul << EVTCHN_SHIFT) - 1);
+ *bit = 1ul << evb;
+}
+
+void
+ec_bind_evtchn_to_handler(int evtchn, pri_t pri, ec_handler_fcn_t handler,
+ void *arg1)
+{
+ ddi_softint_handle_t hdl;
+
+ if (evtchn < 0 || evtchn > NR_EVENT_CHANNELS) {
+ cmn_err(CE_WARN, "Binding invalid event channel: %d", evtchn);
+ return;
+ }
+
+ (void) ddi_intr_add_softint(xpv_dip, &hdl, pri, handler, (caddr_t)arg1);
+ mutex_enter(&ec_lock);
+ ASSERT(evtchn_to_handle[evtchn] == NULL);
+ evtchn_to_handle[evtchn] = hdl;
+ mutex_exit(&ec_lock);
+
+ /* Let the hypervisor know we're prepared to handle this event */
+ hypervisor_unmask_event(evtchn);
+}
+
+void
+ec_unbind_evtchn(int evtchn)
+{
+ evtchn_close_t close;
+ ddi_softint_handle_t hdl;
+
+ if (evtchn < 0 || evtchn > NR_EVENT_CHANNELS) {
+ cmn_err(CE_WARN, "Unbinding invalid event channel: %d", evtchn);
+ return;
+ }
+
+ /*
+ * Let the hypervisor know we're no longer prepared to handle this
+ * event
+ */
+ hypervisor_mask_event(evtchn);
+
+ /* Cleanup the event handler metadata */
+ mutex_enter(&ec_lock);
+ hdl = evtchn_to_handle[evtchn];
+ evtchn_to_handle[evtchn] = NULL;
+ mutex_exit(&ec_lock);
+
+ close.port = evtchn;
+ (void) HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ (void) ddi_intr_remove_softint(hdl);
+}
+
+void
+ec_notify_via_evtchn(unsigned int port)
+{
+ evtchn_send_t send;
+
+ if ((int)port == -1)
+ return;
+ send.port = port;
+ (void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
+}
+
+void
+hypervisor_unmask_event(unsigned int ev)
+{
+ int evi;
+ uint32_t bit;
+ volatile uint32_t *maskp;
+ evtchn_unmask_t unmask;
+
+ /*
+ * Translate the event number into a index into the masked-events
+ * bitmask, and set the bit to 0.
+ */
+ get_event_bit(ev, &evi, &bit);
+ maskp = (volatile uint32_t *)&masked_events[evi];
+ atomic_and_32(maskp, ~bit);
+
+ /* Let the hypervisor know the event has been unmasked */
+ unmask.port = ev;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0)
+ panic("xen_evtchn_unmask() failed");
+}
+
+/* Set a bit in an evtchan mask word */
+void
+hypervisor_mask_event(uint_t ev)
+{
+ int evi;
+ uint32_t bit;
+ volatile uint32_t *maskp;
+
+ get_event_bit(ev, &evi, &bit);
+ maskp = (volatile uint32_t *)&masked_events[evi];
+ atomic_or_32(maskp, bit);
+}
+
+void
+hypervisor_clear_event(uint_t ev)
+{
+ int evi;
+ uint32_t bit;
+ volatile uint32_t *maskp;
+
+ get_event_bit(ev, &evi, &bit);
+ maskp = (volatile uint32_t *)&pending_events[evi];
+ atomic_and_32(maskp, ~bit);
+}
+
+int
+xen_alloc_unbound_evtchn(int domid, int *evtchnp)
+{
+ evtchn_alloc_unbound_t alloc;
+ int err;
+
+ alloc.dom = DOMID_SELF;
+ alloc.remote_dom = (domid_t)domid;
+
+ if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+ &alloc)) == 0) {
+ *evtchnp = alloc.port;
+ /* ensure evtchn is masked till we're ready to use it */
+ (void) hypervisor_mask_event(*evtchnp);
+ } else {
+ err = xen_xlate_errcode(err);
+ }
+
+ return (err);
+}
+
+int
+xen_bind_interdomain(int domid, int remote_port, int *port)
+{
+ evtchn_bind_interdomain_t bind;
+ int err;
+
+ bind.remote_dom = (domid_t)domid;
+ bind.remote_port = remote_port;
+ if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+ &bind)) == 0)
+ *port = bind.local_port;
+ else
+ err = xen_xlate_errcode(err);
+ return (err);
+}
+
+static int
+ev_ffs(uint32_t bits)
+{
+ int i;
+
+ if (bits == 0)
+ return (0);
+ for (i = 1; ; i++, bits >>= 1) {
+ if (bits & 1)
+ break;
+ }
+ return (i);
+}
+
+/*ARGSUSED*/
+uint_t
+evtchn_callback_fcn(caddr_t arg0, caddr_t arg1)
+{
+ uint32_t pending_word;
+ int i, j, port;
+ volatile struct vcpu_info *vci;
+ uint_t rv = DDI_INTR_UNCLAIMED;
+ ddi_softint_handle_t hdl;
+ caddr_t pending_sel_addr;
+ int low, high;
+
+ vci = &HYPERVISOR_shared_info->vcpu_info[CPU->cpu_id];
+ pending_sel_addr = (caddr_t)&vci->evtchn_pending_sel;
+#ifndef __amd64
+ /*
+ * More 32/64-bit ugliness. Xen defines this field as a long, so
+ * it ends up misaligned in a 32-bit domU.
+ */
+ if (xen_is_64bit)
+ pending_sel_addr = (caddr_t)
+ P2ROUNDUP((uintptr_t)pending_sel_addr, sizeof (uint64_t));
+#endif
+
+again:
+ DTRACE_PROBE2(evtchn__scan__start, int, vci->evtchn_upcall_pending,
+ ulong_t, vci->evtchn_pending_sel);
+
+ atomic_and_8(&vci->evtchn_upcall_pending, 0);
+
+ /*
+ * Find the upper and lower bounds in which we need to search for
+ * pending events.
+ */
+ if (xen_is_64bit) {
+ uint64_t sels;
+
+ GET_AND_CLEAR_64((volatile uint64_t *)pending_sel_addr, sels);
+
+ /* sels == 1 is by far the most common case. Make it fast */
+ if (sels == 1)
+ low = high = 0;
+ else if (sels == 0)
+ return (rv);
+ else
+ GET_BOUNDS(sels, 63, low, high);
+
+ /*
+ * Each bit in the pending_sels bitmap represents 2 entries
+ * in our forced-to-be-32-bit event channel array.
+ */
+ low = low * 2;
+ high = high * 2 + 1;
+ } else {
+ uint32_t sels;
+
+ GET_AND_CLEAR_32((volatile uint32_t *)pending_sel_addr, sels);
+
+ /* sels == 1 is by far the most common case. Make it fast */
+ if (sels == 1)
+ low = high = 0;
+ else if (sels == 0)
+ return (rv);
+ else
+ GET_BOUNDS(sels, 31, low, high);
+ }
+
+ /* Scan the port list, looking for words with bits set */
+ for (i = low; i <= high; i++) {
+ uint32_t tmp;
+
+ GET_AND_CLEAR_32(&pending_events[i], tmp);
+ pending_word = tmp & ~(masked_events[i]);
+
+ /* Scan the bits in the word, looking for pending events */
+ while (pending_word != 0) {
+ j = ev_ffs(pending_word) - 1;
+ port = (i << EVTCHN_SHIFT) + j;
+ pending_word = pending_word & ~(1 << j);
+
+ /*
+ * If there is a handler registered for this event,
+ * schedule a softint of the appropriate priority
+ * to execute it.
+ */
+ if ((hdl = evtchn_to_handle[port]) != NULL) {
+ (void) ddi_intr_trigger_softint(hdl, NULL);
+ rv = DDI_INTR_CLAIMED;
+ }
+ }
+ }
+ DTRACE_PROBE2(evtchn__scan__end, int, vci->evtchn_upcall_pending,
+ ulong_t, vci->evtchn_pending_sel);
+
+ if ((volatile uint8_t)vci->evtchn_upcall_pending ||
+ *((volatile ulong_t *)pending_sel_addr))
+ goto again;
+
+ return (rv);
+}
+
+static int
+set_hvm_callback(int irq)
+{
+ struct xen_hvm_param xhp;
+
+ xhp.domid = DOMID_SELF;
+ xhp.index = HVM_PARAM_CALLBACK_IRQ;
+ xhp.value = irq;
+ return (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp));
+}
+
+void
+ec_fini()
+{
+ int i;
+
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ ec_unbind_evtchn(i);
+
+ evtchn_callback_irq = -1;
+ if (evtchn_ihp != NULL) {
+ (void) ddi_intr_disable(*evtchn_ihp);
+ (void) ddi_intr_remove_handler(*evtchn_ihp);
+ (void) ddi_intr_free(*evtchn_ihp);
+ kmem_free(evtchn_ihp, sizeof (ddi_intr_handle_t));
+ evtchn_ihp = NULL;
+ }
+}
+
+int
+ec_init(dev_info_t *dip)
+{
+ int i;
+ int rv, actual;
+ ddi_intr_handle_t *ihp;
+ volatile shared_info_t *si = HYPERVISOR_shared_info;
+
+ /*
+ * Translate the variable-sized pending and masked event bitmasks
+ * into constant-sized arrays of uint32_t's.
+ */
+ pending_events = (uint32_t *)&si->evtchn_pending[0];
+ if (xen_is_64bit)
+ event_array_size = 2 * sizeof (uint64_t) * 8;
+ else
+ event_array_size = sizeof (uint32_t) * 8;
+ masked_events = &pending_events[event_array_size];
+
+ /*
+ * Clear our event handler structures and prevent the hypervisor
+ * from triggering any events.
+ */
+ mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7));
+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+ evtchn_to_handle[i] = NULL;
+ (void) hypervisor_mask_event(i);
+ }
+
+ /*
+ * Allocate and initialize an interrupt handler to process the
+ * hypervisor's "hey you have events pending!" interrupt.
+ */
+ ihp = kmem_zalloc(sizeof (ddi_intr_handle_t), KM_SLEEP);
+ rv = ddi_intr_alloc(dip, ihp, DDI_INTR_TYPE_FIXED, 0, 1, &actual,
+ DDI_INTR_ALLOC_NORMAL);
+ if (rv < 0 || actual != 1) {
+ cmn_err(CE_WARN, "Could not allocate evtchn interrupt: %d",
+ rv);
+ return (-1);
+ }
+
+ rv = ddi_intr_add_handler(*ihp, evtchn_callback_fcn, NULL, NULL);
+ if (rv < 0) {
+ (void) ddi_intr_free(*ihp);
+ cmn_err(CE_WARN, "Could not attach evtchn handler");
+ return (-1);
+ }
+ evtchn_ihp = ihp;
+
+ if (ddi_intr_enable(*ihp) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "Could not enable evtchn interrupts\n");
+ return (-1);
+ }
+
+ /* Tell the hypervisor which interrupt we're waiting on. */
+ evtchn_callback_irq = ((ddi_intr_handle_impl_t *)*ihp)->ih_vector;
+
+ if (set_hvm_callback(evtchn_callback_irq) != 0) {
+ cmn_err(CE_WARN, "Couldn't register evtchn callback");
+ return (-1);
+ }
+ return (0);
+}
diff --git a/usr/src/uts/i86pc/io/xpv/xpv.conf b/usr/src/uts/i86pc/io/xpv/xpv.conf
new file mode 100644
index 0000000000..d599f6f3ff
--- /dev/null
+++ b/usr/src/uts/i86pc/io/xpv/xpv.conf
@@ -0,0 +1,28 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+# ident "%Z%%M% %I% %E% SMI"
+
+interrupt-priorities=9;
diff --git a/usr/src/uts/i86pc/io/xpv/xpv_support.c b/usr/src/uts/i86pc/io/xpv/xpv_support.c
new file mode 100644
index 0000000000..fb34924319
--- /dev/null
+++ b/usr/src/uts/i86pc/io/xpv/xpv_support.c
@@ -0,0 +1,541 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/modctl.h>
+#include <sys/types.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/sunndi.h>
+#include <sys/sunddi.h>
+#include <sys/ddi_subrdefs.h>
+#include <sys/xpv_support.h>
+#include <sys/xen_errno.h>
+#include <sys/hypervisor.h>
+#include <sys/gnttab.h>
+#include <sys/xenbus_comms.h>
+#include <sys/xenbus_impl.h>
+#include <xen/sys/xendev.h>
+#include <sys/sysmacros.h>
+#include <sys/x86_archext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/pc_mmu.h>
+#include <sys/cmn_err.h>
+#include <vm/seg_kmem.h>
+#include <vm/as.h>
+#include <vm/hat_pte.h>
+#include <vm/hat_i86.h>
+
+#define XPV_MINOR 0
+
+/*
+ * This structure is ordinarily constructed by Xen. In the HVM world, we
+ * manually fill in the few fields the PV drivers need.
+ */
+start_info_t *xen_info = NULL;
+
+/* Xen version number. */
+int xen_major, xen_minor;
+
+/* Metadata page shared between domain and Xen */
+shared_info_t *HYPERVISOR_shared_info = NULL;
+
+/* Page containing code to issue hypercalls. */
+extern caddr_t hypercall_page;
+
+/* Is the hypervisor 64-bit? */
+int xen_is_64bit = -1;
+
+/* virtual addr for the store_mfn page */
+caddr_t xb_addr;
+
+dev_info_t *xpv_dip;
+
+/*
+ * Forward declarations
+ */
+static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int xpv_attach(dev_info_t *, ddi_attach_cmd_t);
+static int xpv_detach(dev_info_t *, ddi_detach_cmd_t);
+static int xpv_open(dev_t *, int, int, cred_t *);
+static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+
+static struct cb_ops xpv_cb_ops = {
+ xpv_open,
+ nulldev, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ xpv_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op,
+ NULL,
+ D_MP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops xpv_dv_ops = {
+ DEVO_REV,
+ 0,
+ xpv_getinfo,
+ nulldev, /* identify */
+ nulldev, /* probe */
+ xpv_attach,
+ xpv_detach,
+ nodev, /* reset */
+ &xpv_cb_ops,
+ NULL, /* struct bus_ops */
+ NULL /* power */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "xpv driver %I%",
+ &xpv_dv_ops
+};
+
+static struct modlinkage modl = {
+ MODREV_1,
+ {
+ (void *)&modldrv,
+ NULL /* null termination */
+ }
+};
+
+static ddi_dma_attr_t xpv_dma_attr = {
+ DMA_ATTR_V0, /* version of this structure */
+ 0, /* lowest usable address */
+ 0xffffffffffffffffULL, /* highest usable address */
+ 0x7fffffff, /* maximum DMAable byte count */
+ MMU_PAGESIZE, /* alignment in bytes */
+ 0x7ff, /* bitmap of burst sizes */
+ 1, /* minimum transfer */
+ 0xffffffffU, /* maximum transfer */
+ 0x7fffffffULL, /* maximum segment length */
+ 1, /* maximum number of segments */
+ 1, /* granularity */
+ 0, /* flags (reserved) */
+};
+
+static ddi_device_acc_attr_t xpv_accattr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_NEVERSWAP_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+#define MAX_ALLOCATIONS 10
+static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS];
+static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS];
+static int xen_alloc_cnt = 0;
+
+void *
+xen_alloc_pages(pgcnt_t cnt)
+{
+ size_t len;
+ int a = xen_alloc_cnt++;
+ caddr_t addr;
+
+ ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS);
+ if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0,
+ &xpv_dma_handle[a]) != DDI_SUCCESS)
+ return (NULL);
+
+ if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt,
+ &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0,
+ &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) {
+ ddi_dma_free_handle(&xpv_dma_handle[a]);
+ cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices");
+ return (NULL);
+ }
+ return (addr);
+}
+
+/*
+ * This function is invoked twice, first time with reprogram=0 to set up
+ * the xpvd portion of the device tree. The second time it is ignored.
+ */
+static void
+xpv_enumerate(int reprogram)
+{
+ dev_info_t *dip;
+
+ if (reprogram != 0)
+ return;
+
+ ndi_devi_alloc_sleep(ddi_root_node(), "xpvd",
+ (pnode_t)DEVI_SID_NODEID, &dip);
+
+ (void) ndi_devi_bind_driver(dip, 0);
+
+ /*
+ * Too early to enumerate split device drivers in domU
+ * since we need to create taskq thread during enumeration.
+ * So, we only enumerate softdevs and console here.
+ */
+ xendev_enum_all(dip, B_TRUE);
+}
+
+/*
+ * Translate a hypervisor errcode to a Solaris error code.
+ */
+int
+xen_xlate_errcode(int error)
+{
+#define CASE(num) case X_##num: error = num; break
+
+ switch (-error) {
+ CASE(EPERM); CASE(ENOENT); CASE(ESRCH);
+ CASE(EINTR); CASE(EIO); CASE(ENXIO);
+ CASE(E2BIG); CASE(ENOMEM); CASE(EACCES);
+ CASE(EFAULT); CASE(EBUSY); CASE(EEXIST);
+ CASE(ENODEV); CASE(EISDIR); CASE(EINVAL);
+ CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS);
+ CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN);
+ CASE(ENODATA);
+ default:
+ panic("xen_xlate_errcode: unknown error %d", error);
+ }
+ return (error);
+#undef CASE
+}
+
+/*PRINTFLIKE1*/
+void
+xen_printf(const char *fmt, ...)
+{
+ va_list adx;
+
+ va_start(adx, fmt);
+ printf(fmt, adx);
+ va_end(adx);
+}
+
+/*
+ * Stub functions to get the FE drivers to build, and to catch drivers that
+ * misbehave in HVM domains.
+ */
+/*ARGSUSED*/
+void
+xen_release_pfn(pfn_t pfn, caddr_t va)
+{
+ panic("xen_release_pfn() is not supported in HVM domains");
+}
+
+/*ARGSUSED*/
+void
+reassign_pfn(pfn_t pfn, mfn_t mfn)
+{
+ panic("reassign_pfn() is not supported in HVM domains");
+}
+
+/*ARGSUSED*/
+long
+balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
+{
+ panic("balloon_free_pages() is not supported in HVM domains");
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+balloon_drv_added(int64_t delta)
+{
+ panic("balloon_drv_added() is not supported in HVM domains");
+}
+
+/*
+ * Add a mapping for the machine page at the given virtual address.
+ */
+void
+kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
+{
+ ASSERT(level == 0);
+
+ hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE,
+ mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD);
+}
+
+static uint64_t
+hvm_get_param(int param_id)
+{
+ struct xen_hvm_param xhp;
+
+ xhp.domid = DOMID_SELF;
+ xhp.index = param_id;
+ if ((HYPERVISOR_hvm_op(HVMOP_get_param, &xhp) < 0))
+ return (-1);
+ return (xhp.value);
+}
+
+static int
+xen_pv_init(dev_info_t *xpv_dip)
+{
+ struct cpuid_regs cp;
+ uint32_t xen_signature[4];
+ char *xen_str;
+ struct xen_add_to_physmap xatp;
+ xen_capabilities_info_t caps;
+ pfn_t pfn;
+ uint64_t msrval;
+ int err;
+
+ /*
+ * Xen's pseudo-cpuid function 0x40000000 returns a string
+ * representing the Xen signature in %ebx, %ecx, and %edx.
+ * %eax contains the maximum supported cpuid function.
+ */
+ cp.cp_eax = 0x40000000;
+ (void) __cpuid_insn(&cp);
+ xen_signature[0] = cp.cp_ebx;
+ xen_signature[1] = cp.cp_ecx;
+ xen_signature[2] = cp.cp_edx;
+ xen_signature[3] = 0;
+ xen_str = (char *)xen_signature;
+ if (strcmp("XenVMMXenVMM", xen_str) != 0 ||
+ cp.cp_eax < 0x40000002) {
+ cmn_err(CE_WARN,
+ "Attempting to load Xen drivers on non-Xen system");
+ return (-1);
+ }
+
+ /*
+ * cpuid function 0x40000001 returns the Xen version in %eax. The
+ * top 16 bits are the major version, the bottom 16 are the minor
+ * version.
+ */
+ cp.cp_eax = 0x40000001;
+ (void) __cpuid_insn(&cp);
+ xen_major = cp.cp_eax >> 16;
+ xen_minor = cp.cp_eax & 0xffff;
+ if (xen_major != 3 || xen_minor != 0) {
+ cmn_err(CE_WARN, "Xen version %d.%d is not supported",
+ xen_major, xen_minor);
+ return (-1);
+ }
+
+ /*
+ * cpuid function 0x40000002 returns information about the
+ * hypercall page. %eax nominally contains the number of pages
+ * with hypercall code, but according to the Xen guys, "I'll
+ * guarantee that remains one forever more, so you can just
+ * allocate a single page and get quite upset if you ever see CPUID
+ * return more than one page." %ebx contains an MSR we use to ask
+ * Xen to remap each page at a specific pfn.
+ */
+ cp.cp_eax = 0x40000002;
+ (void) __cpuid_insn(&cp);
+
+ /*
+ * Let Xen know where we want the hypercall page mapped. We
+ * already have a page allocated in the .text section to simplify
+ * the wrapper code.
+ */
+ pfn = hat_getpfnum(kas.a_hat, (caddr_t)&hypercall_page);
+ msrval = mmu_ptob(pfn);
+ wrmsr(cp.cp_ebx, msrval);
+
+ /* Fill in the xen_info data */
+ xen_info = kmem_zalloc(sizeof (start_info_t), KM_SLEEP);
+ (void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor);
+ xen_info->store_mfn = (mfn_t)hvm_get_param(HVM_PARAM_STORE_PFN);
+ xen_info->store_evtchn = (int)hvm_get_param(HVM_PARAM_STORE_EVTCHN);
+
+ /* Figure out whether the hypervisor is 32-bit or 64-bit. */
+ if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) {
+ ((char *)(caps))[sizeof (caps) - 1] = '\0';
+ if (strstr(caps, "x86_64") != NULL)
+ xen_is_64bit = 1;
+ else if (strstr(caps, "x86_32") != NULL)
+ xen_is_64bit = 0;
+ }
+ if (xen_is_64bit < 0) {
+ cmn_err(CE_WARN, "Couldn't get capability info from Xen.");
+ return (-1);
+ }
+#ifdef __amd64
+ ASSERT(xen_is_64bit == 1);
+#endif
+
+ /*
+ * Allocate space for the shared_info page and tell Xen where it
+ * is.
+ */
+ HYPERVISOR_shared_info = xen_alloc_pages(1);
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = hat_getpfnum(kas.a_hat, (caddr_t)HYPERVISOR_shared_info);
+ if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) {
+ cmn_err(CE_WARN, "Could not get shared_info page from Xen."
+ " error: %d", err);
+ return (-1);
+ }
+
+ /* Set up the grant tables. */
+ gnttab_init();
+
+ /* Set up event channel support */
+ if (ec_init(xpv_dip) != 0)
+ return (-1);
+
+ /* Set up xenbus */
+ xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
+ xs_early_init();
+ xs_domu_init();
+
+ return (0);
+}
+
+static void
+xen_pv_fini()
+{
+ if (xen_info != NULL)
+ kmem_free(xen_info, sizeof (start_info_t));
+ ec_fini();
+}
+
+/*ARGSUSED*/
+static int
+xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+ if (getminor((dev_t)arg) != XPV_MINOR)
+ return (DDI_FAILURE);
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = xpv_dip;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = 0;
+ break;
+ default:
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static int
+xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
+ ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ xpv_dip = dip;
+
+ if (xen_pv_init(dip) != 0)
+ return (DDI_FAILURE);
+
+ ddi_report_dev(dip);
+
+ /*
+ * If the memscrubber attempts to scrub the pages we hand to Xen,
+ * the domain will panic.
+ */
+ memscrub_disable();
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Attempts to reload the PV driver plumbing hang on Intel platforms, so
+ * we don't want to unload the framework by accident.
+ */
+int xpv_allow_detach = 0;
+
+static int
+xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH || xpv_allow_detach == 0)
+ return (DDI_FAILURE);
+
+ if (xpv_dip != NULL) {
+ xen_pv_fini();
+ ddi_remove_minor_node(dip, NULL);
+ xpv_dip = NULL;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED1*/
+static int
+xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr)
+{
+ return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO);
+}
+
+/*ARGSUSED*/
+static int
+xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr,
+ int *rval_p)
+{
+ return (EINVAL);
+}
+
+int
+_init(void)
+{
+ int err;
+
+ if ((err = mod_install(&modl)) != 0)
+ return (err);
+
+ impl_bus_add_probe(xpv_enumerate);
+ return (0);
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modl)) != 0)
+ return (err);
+
+ impl_bus_delete_probe(xpv_enumerate);
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modl, modinfop));
+}
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 731e885508..822ec8a4fa 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -564,6 +564,34 @@ cpuid_free_space(cpu_t *cpu)
kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi));
}
+#if !defined(__xpv)
+
+static void
+check_for_hvm()
+{
+ struct cpuid_regs cp;
+ char *xen_str;
+ uint32_t xen_signature[4];
+ extern int xpv_is_hvm;
+
+ /*
+ * In a fully virtualized domain, Xen's pseudo-cpuid function
+ * 0x40000000 returns a string representing the Xen signature in
+ * %ebx, %ecx, and %edx. %eax contains the maximum supported cpuid
+ * function.
+ */
+ cp.cp_eax = 0x40000000;
+ (void) __cpuid_insn(&cp);
+ xen_signature[0] = cp.cp_ebx;
+ xen_signature[1] = cp.cp_ecx;
+ xen_signature[2] = cp.cp_edx;
+ xen_signature[3] = 0;
+ xen_str = (char *)xen_signature;
+ if (strcmp("XenVMMXenVMM", xen_str) == 0 && cp.cp_eax <= 0x40000002)
+ xpv_is_hvm = 1;
+}
+#endif /* __xpv */
+
uint_t
cpuid_pass1(cpu_t *cpu)
{
@@ -1227,6 +1255,9 @@ cpuid_pass1(cpu_t *cpu)
synth_info(cpi);
pass1_done:
+#if !defined(__xpv)
+ check_for_hvm();
+#endif
cpi->cpi_pass = 1;
return (feature);
}
@@ -3674,7 +3705,6 @@ void
patch_tsc_read(int flag)
{
size_t cnt;
-
switch (flag) {
case X86_NO_TSC:
cnt = &_no_rdtsc_end - &_no_rdtsc_start;
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index 3f5705bbc6..f33f60e320 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -105,6 +105,9 @@ mlsetup(struct regs *rp)
extern disp_t cpu0_disp;
extern char t0stack[];
int boot_ncpus;
+#if !defined(__xpv)
+ extern int xpv_is_hvm;
+#endif
ASSERT_STACK_ALIGNED();
@@ -176,8 +179,11 @@ mlsetup(struct regs *rp)
* Note: tsc_read is not patched for x86 processors which do
* not support "mfence". By default tsc_read will use cpuid for
* serialization in such cases.
+ *
+ * The Xen hypervisor does not correctly report whether rdtscp is
+ * supported or not, so we must assume that it is not.
*/
- if (x86_feature & X86_TSCP)
+ if (xpv_is_hvm == 0 && (x86_feature & X86_TSCP))
patch_tsc_read(X86_HAVE_TSCP);
else if (cpuid_getvendor(CPU) == X86_VENDOR_AMD &&
cpuid_getfamily(CPU) <= 0xf && (x86_feature & X86_SSE2) != 0)
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index 861c1e0f3e..30864285af 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -521,6 +521,11 @@ static page_t *rd_pages;
struct system_hardware system_hardware;
/*
+ * Is this Solaris instance running in a fully virtualized xVM domain?
+ */
+int xpv_is_hvm = 0;
+
+/*
* Enable some debugging messages concerning memory usage...
*/
static void
@@ -1339,6 +1344,36 @@ startup_kmem(void)
PRM_POINT("startup_kmem() done");
}
+#ifndef __xpv
+/*
+ * If we have detected that we are running in an HVM environment, we need
+ * to prepend the PV driver directory to the module search path.
+ */
+#define HVM_MOD_DIR "/platform/i86hvm/kernel"
+static void
+update_default_path()
+{
+ char *current, *newpath;
+ int newlen;
+
+ /*
+ * We are about to resync with krtld. krtld will reset its
+ * internal module search path iff Solaris has set default_path.
+ * We want to be sure we're prepending this new directory to the
+ * right search path.
+ */
+ current = (default_path == NULL) ? kobj_module_path : default_path;
+
+ newlen = strlen(HVM_MOD_DIR) + strlen(current) + 1;
+ newpath = kmem_alloc(newlen, KM_SLEEP);
+ (void) strcpy(newpath, HVM_MOD_DIR);
+ (void) strcat(newpath, " ");
+ (void) strcat(newpath, current);
+
+ default_path = newpath;
+}
+#endif
+
static void
startup_modules(void)
{
@@ -1355,6 +1390,9 @@ startup_modules(void)
* caused the drv_usecwait to be way too short.
*/
microfind();
+
+ if (xpv_is_hvm)
+ update_default_path();
#endif
/*
diff --git a/usr/src/uts/i86pc/sys/xpv_support.h b/usr/src/uts/i86pc/sys/xpv_support.h
new file mode 100644
index 0000000000..c42551b4f8
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/xpv_support.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_XPV_SUPPORT_H
+#define _SYS_XPV_SUPPORT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__
+
+#if !defined(_ASM)
+
+#include <sys/types.h>
+#include <sys/inttypes.h>
+#include <sys/dditypes.h>
+
+typedef ulong_t mfn_t;
+typedef uint64_t maddr_t;
+#define mfn_to_ma(mfn) ((maddr_t)(mfn) << MMU_PAGESHIFT)
+#define MFN_INVALID (-(mfn_t)1)
+
+#define IPL_DEBUG 15 /* domain debug interrupt */
+#define IPL_CONS 9
+#define IPL_VIF 6
+#define IPL_VBD 5
+#define IPL_EVTCHN 1
+
+#define INVALID_EVTCHN 0
+
+typedef uint_t (*ec_handler_fcn_t)();
+
+extern int ec_init(dev_info_t *);
+extern void ec_fini();
+extern void ec_bind_evtchn_to_handler(int, pri_t, ec_handler_fcn_t, void *);
+extern void ec_unbind_evtchn(int);
+extern void ec_notify_via_evtchn(uint_t);
+extern void hypervisor_mask_event(uint_t);
+extern void hypervisor_unmask_event(uint_t);
+
+extern int xen_bind_interdomain(int, int, int *);
+extern int xen_alloc_unbound_evtchn(int, int *);
+extern int xen_xlate_errcode(int error);
+extern void *xen_alloc_pages(pgcnt_t cnt);
+extern void kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level);
+
+/*
+ * Stub functions to allow the FE drivers to build without littering them
+ * with #ifdefs
+ */
+extern void balloon_drv_added(int64_t);
+extern long balloon_free_pages(uint_t, mfn_t *, caddr_t, pfn_t *);
+extern void xen_release_pfn(pfn_t, caddr_t);
+extern void reassign_pfn(pfn_t, mfn_t);
+
+extern int xen_is_64bit;
+
+#define IN_XPV_PANIC() (__lintzero)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __ASM */
+#endif /* _SYS_XPV_SUPPORT_H */
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index 457cd5662d..a3a7957faa 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -326,7 +326,6 @@ hat_alloc(struct as *as)
}
init_done:
- XPV_ALLOW_MIGRATE();
#if defined(__xpv)
/*
@@ -337,6 +336,7 @@ init_done:
xen_pin(hat->hat_user_ptable, mmu.max_level);
#endif
#endif
+ XPV_ALLOW_MIGRATE();
/*
* Put it at the start of the global list of all hats (used by stealing)
@@ -3815,6 +3815,7 @@ hat_mempte_setup(caddr_t addr)
ASSERT(IS_PAGEALIGNED(va));
ASSERT(!IN_VA_HOLE(va));
++curthread->t_hatdepth;
+ XPV_DISALLOW_MIGRATE();
ht = htable_getpte(kas.a_hat, va, &entry, &oldpte, 0);
if (ht == NULL) {
ht = htable_create(kas.a_hat, va, 0, NULL);
@@ -3835,6 +3836,7 @@ hat_mempte_setup(caddr_t addr)
* return the PTE physical address to the caller.
*/
htable_release(ht);
+ XPV_ALLOW_MIGRATE();
p = PT_INDEX_PHYSADDR(pfn_to_pa(ht->ht_pfn), entry);
--curthread->t_hatdepth;
return (p);
@@ -3850,6 +3852,7 @@ hat_mempte_release(caddr_t addr, hat_mempte_t pte_pa)
{
htable_t *ht;
+ XPV_DISALLOW_MIGRATE();
/*
* invalidate any left over mapping and decrement the htable valid count
*/
@@ -3878,6 +3881,7 @@ hat_mempte_release(caddr_t addr, hat_mempte_t pte_pa)
ASSERT(ht->ht_level == 0);
HTABLE_DEC(ht->ht_valid_cnt);
htable_release(ht);
+ XPV_ALLOW_MIGRATE();
}
/*
@@ -4266,7 +4270,9 @@ void
hat_prepare_mapping(hat_t *hat, caddr_t addr)
{
ASSERT(IS_P2ALIGNED((uintptr_t)addr, MMU_PAGESIZE));
+ XPV_DISALLOW_MIGRATE();
(void) htable_create(hat, (uintptr_t)addr, 0, NULL);
+ XPV_ALLOW_MIGRATE();
}
void
@@ -4275,10 +4281,12 @@ hat_release_mapping(hat_t *hat, caddr_t addr)
htable_t *ht;
ASSERT(IS_P2ALIGNED((uintptr_t)addr, MMU_PAGESIZE));
+ XPV_DISALLOW_MIGRATE();
ht = htable_lookup(hat, (uintptr_t)addr, 0);
ASSERT(ht != NULL);
ASSERT(ht->ht_busy >= 2);
htable_release(ht);
htable_release(ht);
-}
+ XPV_ALLOW_MIGRATE();
+ }
#endif
diff --git a/usr/src/uts/i86pc/xnf/Makefile b/usr/src/uts/i86pc/xnf/Makefile
new file mode 100644
index 0000000000..f582e85990
--- /dev/null
+++ b/usr/src/uts/i86pc/xnf/Makefile
@@ -0,0 +1,98 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# uts/i86pc/xnf/Makefile
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the xve
+# network driver kernel module.
+#
+# i86pc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = xnf
+OBJECTS = $(XNF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(XNF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/xen
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+include $(UTSBASE)/i86pc/Makefile.hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Driver depends on MAC & IP
+#
+LDFLAGS += -dy -Nmisc/mac -Ndrv/ip -Ndrv/xpvd -Ndrv/xpv
+
+CPPFLAGS += -D_SOLARIS
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86pc/xpv/Makefile b/usr/src/uts/i86pc/xpv/Makefile
new file mode 100644
index 0000000000..7f859166c9
--- /dev/null
+++ b/usr/src/uts/i86pc/xpv/Makefile
@@ -0,0 +1,101 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/xpv/Makefile
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the xpv
+# driver, which provides the necessary infrastructure for
+# paravirtualized front-end drivers in HVM systems.
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = xpv
+OBJECTS = $(XPV_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(XPV_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/i86pc/io/xpv
+
+INC_PATH += -I$(UTSBASE)/common/xen -I$(UTSBASE)/../common
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+include $(UTSBASE)/i86pc/Makefile.hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -D_SOLARIS
+LDFLAGS += -dy -N mach/pcplusmp
+
+#
+# The Xen header files do not lint cleanly. Since the troublesome
+# structures form part of the externally defined interface to the
+# hypervisor, we're stuck with the noise.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86pc/xpvd/Makefile b/usr/src/uts/i86pc/xpvd/Makefile
new file mode 100644
index 0000000000..01e515daf4
--- /dev/null
+++ b/usr/src/uts/i86pc/xpvd/Makefile
@@ -0,0 +1,92 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the xpvd nexus driver
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = xpvd
+OBJECTS = $(XPVD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(XPVD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/xen/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+include $(UTSBASE)/i86pc/Makefile.hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+INC_PATH += -I$(UTSBASE)/common/xen -I$(UTSBASE)/../common
+
+LDFLAGS += -dy -Ndrv/xpv
+
+LINTTAGS += -erroff=E_STATIC_UNUSED
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86xpv/Makefile.files b/usr/src/uts/i86xpv/Makefile.files
index 221c580e2c..927414c3e8 100644
--- a/usr/src/uts/i86xpv/Makefile.files
+++ b/usr/src/uts/i86xpv/Makefile.files
@@ -209,7 +209,8 @@ XDB_OBJS += xdb.o
#
# Build up defines and paths.
#
-INC_PATH += -I$(UTSBASE)/i86xpv -I$(UTSBASE)/i86pc -I$(SRC)/common
+INC_PATH += -I$(UTSBASE)/i86xpv -I$(UTSBASE)/i86pc -I$(SRC)/common \
+ -I$(UTSBASE)/common/xen
#
# Since the assym files are derived, the dependencies must be explicit for
diff --git a/usr/src/uts/i86xpv/Makefile.rules b/usr/src/uts/i86xpv/Makefile.rules
index 63fec2422d..7b758fd3f6 100644
--- a/usr/src/uts/i86xpv/Makefile.rules
+++ b/usr/src/uts/i86xpv/Makefile.rules
@@ -171,6 +171,9 @@ DBOOT_ASFLAGS = $(DBOOT_AS_XARCH_$(CLASS)) -P -D_ASM
DBOOT_LINTFLAGS_i86xpv = $(LINTFLAGS_i386_$(CLASS)) $(LINTTAGS_i386_$(CLASS))
+$(DBOOT_OBJS_DIR)/%.o: $(UTSBASE)/common/xen/os/%.c
+ $(CC) $(DBOOT_CFLAGS) $(DBOOT_DEFS) $(DBOOT_CC_INCL) -c -o $@ $<
+
$(DBOOT_OBJS_DIR)/%.o: $(UTSBASE)/i86xpv/boot/%.c
$(CC) $(DBOOT_CFLAGS) $(DBOOT_DEFS) $(DBOOT_CC_INCL) -c -o $@ $<
@@ -186,6 +189,9 @@ $(DBOOT_OBJS_DIR)/%.o: $(COMMONBASE)/util/%.c
$(DBOOT_OBJS_DIR)/%.o: $(UTSBASE)/i86xpv/os/%.c
$(CC) $(DBOOT_CFLAGS) $(DBOOT_DEFS) $(DBOOT_CC_INCL) -c -o $@ $<
+$(DBOOT_OBJS_DIR)/%.o: $(UTSBASE)/intel/ia32/ml/%.s
+ $(AS) $(DBOOT_ASFLAGS) $(DBOOT_DEFS) $(DBOOT_AS_INCL) -o $@ $<
+
$(DBOOT_OBJS_DIR)/%.o: $(COMMONBASE)/util/i386/%.s
$(AS) $(DBOOT_ASFLAGS) $(DBOOT_DEFS) $(DBOOT_AS_INCL) -o $@ $<
@@ -220,6 +226,12 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/io/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/os/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(DBOOT_LINTS_DIR)/%.ln: $(UTSBASE)/intel/ia32/ml/%.s
+ @($(LHEAD) $(DBOOT_LINT) $(DBOOT_LOCAL_LINTFLAGS) $< $(LTAIL))
+
+$(DBOOT_LINTS_DIR)/%.ln: $(UTSBASE)/common/xen/os/%.c
+ @($(LHEAD) $(DBOOT_LINT) $(DBOOT_LOCAL_LINTFLAGS) $< $(LTAIL))
+
$(DBOOT_LINTS_DIR)/%.ln: $(UTSBASE)/i86xpv/os/%.c
@($(LHEAD) $(DBOOT_LINT) $(DBOOT_LOCAL_LINTFLAGS) $< $(LTAIL))
diff --git a/usr/src/uts/i86xpv/os/xen_mmu.c b/usr/src/uts/i86xpv/os/xen_mmu.c
index eb9b6e07d9..4983e1fb62 100644
--- a/usr/src/uts/i86xpv/os/xen_mmu.c
+++ b/usr/src/uts/i86xpv/os/xen_mmu.c
@@ -55,12 +55,18 @@ caddr_t xb_addr; /* virtual addr for the store_mfn page */
/*
- * Running on the hypervisor, we need to prevent migration while holding
- * PTE values that we might do PTE2PFN() or pa_to_ma() on, as the
- * mfn_to_pfn_mapping and mfn_list[] translation tables might change.
+ * We need to prevent migration or suspension of a domU while it's
+ * manipulating MFN values, as the MFN values will spontaneously
+ * change. The next 4 routines provide a mechanism for that.
+ * The basic idea is to use reader/writer mutex, readers are any thread
+ * that is manipulating MFNs. Only the thread which is going to actually call
+ * HYPERVISOR_suspend() will become a writer.
*
- * As the suspend process uses the HAT, we need to check we don't already own
- * the lock as a writer before we try to take it as a reader.
+ * Since various places need to manipulate MFNs and also call the HAT,
+ * we track if a thread acquires reader status and allow it to recursively
+ * do so again. This prevents deadlocks if a migration request
+ * is started and waits for some reader, but then the previous reader needs
+ * to call into the HAT.
*/
#define NUM_M2P_LOCKS 128
static struct {
@@ -74,7 +80,7 @@ void
xen_block_migrate(void)
{
if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
- rw_owner(&m2p_lock[XM2P_HASH].m2p_rwlock) != curthread)
+ ++curthread->t_xpvcntr == 1)
rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER);
}
@@ -82,7 +88,7 @@ void
xen_allow_migrate(void)
{
if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
- rw_owner(&m2p_lock[XM2P_HASH].m2p_rwlock) != curthread)
+ --curthread->t_xpvcntr == 0)
rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock);
}
@@ -91,6 +97,8 @@ xen_start_migrate(void)
{
int i;
+ ASSERT(curthread->t_xpvcntr == 0);
+ ++curthread->t_xpvcntr; /* this allows calls into HAT */
for (i = 0; i < NUM_M2P_LOCKS; ++i)
rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER);
}
@@ -102,6 +110,8 @@ xen_end_migrate(void)
for (i = 0; i < NUM_M2P_LOCKS; ++i)
rw_exit(&m2p_lock[i].m2p_rwlock);
+ ASSERT(curthread->t_xpvcntr == 1);
+ --curthread->t_xpvcntr;
}
/*ARGSUSED*/
diff --git a/usr/src/uts/i86xpv/sys/Makefile b/usr/src/uts/i86xpv/sys/Makefile
index 2b9f5507bc..f559679a17 100644
--- a/usr/src/uts/i86xpv/sys/Makefile
+++ b/usr/src/uts/i86xpv/sys/Makefile
@@ -39,9 +39,7 @@ FILEMODE = 644
HDRS= \
balloon.h \
- hypervisor.h \
machprivregs.h \
- xen_errno.h \
xen_mmu.h \
xpv_impl.h
diff --git a/usr/src/uts/i86xpv/ml/hypersubr.s b/usr/src/uts/intel/ia32/ml/hypersubr.s
index f81536f438..c50d24f7d1 100644
--- a/usr/src/uts/i86xpv/ml/hypersubr.s
+++ b/usr/src/uts/intel/ia32/ml/hypersubr.s
@@ -27,18 +27,10 @@
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/asm_linkage.h>
-#include <sys/hypervisor.h>
-
-/*
- * XXPV grr - assembler can't deal with an instruction in a quoted string
- */
-#undef TRAP_INSTR /* cause it's currently "int $0x82" */
-
-#if defined(__amd64)
-#define TRAP_INSTR syscall
-#elif defined(__i386)
-#define TRAP_INSTR int $0x82
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
#endif
+#include <sys/hypervisor.h>
/*
* Hypervisor "system calls"
@@ -125,6 +117,63 @@ __hypercall5_int(int callnum,
#else /* __lint */
+/*
+ * XXPV grr - assembler can't deal with an instruction in a quoted string
+ */
+#undef TRAP_INSTR /* cause it's currently "int $0x82" */
+
+/*
+ * The method for issuing a hypercall (i.e. a system call to the
+ * hypervisor) varies from platform to platform. In 32-bit PV domains, an
+ * 'int 82' triggers the call. In 64-bit PV domains, a 'syscall' does the
+ * trick.
+ *
+ * HVM domains are more complicated. In all cases, we want to issue a
+ * VMEXIT instruction, but AMD and Intel use different opcodes to represent
+ * that instruction. Rather than build CPU-specific modules with the
+ * different opcodes, we use the 'hypercall page' provided by Xen. This
+ * page contains a collection of code stubs that do nothing except issue
+ * hypercalls using the proper instructions for this machine. To keep the
+ * wrapper code as simple and efficient as possible, we preallocate that
+ * page below. When the module is loaded, we ask Xen to remap the
+ * underlying PFN to that of the hypercall page.
+ *
+ * Note: this same mechanism could be used in PV domains, but using
+ * hypercall page requires a call and several more instructions than simply
+ * issuing the proper trap.
+ */
+#if defined(XPV_HVM_DRIVER)
+
+#define HYPERCALL_PAGESIZE 0x1000
+ .text
+ .align HYPERCALL_PAGESIZE
+ .globl hypercall_page
+ .type hypercall_page, @function
+hypercall_page:
+ .skip HYPERCALL_PAGESIZE
+ .size hypercall_page, HYPERCALL_PAGESIZE
+#if defined(__amd64)
+#define TRAP_INSTR \
+ shll $5, %eax; \
+ addq $hypercall_page, %rax; \
+ jmp *%rax
+#else
+#define TRAP_INSTR \
+ shll $5, %eax; \
+ addl $hypercall_page, %eax; \
+ call *%eax
+#endif
+
+#else /* XPV_HVM_DRIVER */
+
+#if defined(__amd64)
+#define TRAP_INSTR syscall
+#elif defined(__i386)
+#define TRAP_INSTR int $0x82
+#endif
+#endif /* XPV_HVM_DRIVER */
+
+
#if defined(__amd64)
ENTRY_NP(__hypercall0)
diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major
index de3388721c..56e10ff720 100644
--- a/usr/src/uts/intel/os/name_to_major
+++ b/usr/src/uts/intel/os/name_to_major
@@ -117,6 +117,7 @@ kssl 185
mc-amd 186
tzmon 187
intel_nb5000 188
+xpv 190
xpvd 191
xnf 192
xdf 193
diff --git a/usr/src/uts/intel/sys/Makefile b/usr/src/uts/intel/sys/Makefile
index 3296cb5735..0d523cc8ac 100644
--- a/usr/src/uts/intel/sys/Makefile
+++ b/usr/src/uts/intel/sys/Makefile
@@ -47,6 +47,7 @@ HDRS = \
fp.h \
frame.h \
inline.h \
+ hypervisor.h \
kd.h \
kdi_machimpl.h \
kdi_regs.h \
@@ -92,7 +93,8 @@ HDRS = \
ucontext.h \
utrap.h \
vmparam.h \
- x86_archext.h
+ x86_archext.h \
+ xen_errno.h
CLOSEDHDRS = \
memtest.h \
diff --git a/usr/src/uts/i86xpv/sys/hypervisor.h b/usr/src/uts/intel/sys/hypervisor.h
index 2810c83b1c..9f5aadd499 100644
--- a/usr/src/uts/i86xpv/sys/hypervisor.h
+++ b/usr/src/uts/intel/sys/hypervisor.h
@@ -58,14 +58,20 @@
extern "C" {
#endif
+#ifdef XPV_HVM_DRIVER
+#include <sys/xpv_support.h>
+#else
#include <sys/xpv_impl.h>
+#endif
#include <sys/xen_errno.h>
#if !defined(_ASM)
#include <sys/processor.h>
#include <sys/cpuvar.h>
+#ifndef XPV_HVM_DRIVER
#include <sys/xen_mmu.h>
+#endif
#include <sys/systm.h>
#include <xen/public/callback.h>
#include <xen/public/event_channel.h>
@@ -133,12 +139,21 @@ extern void xen_disable_user_iopl(void);
/*
* A quick way to ask if we're DOM0 or not ..
*/
+#ifdef XPV_HVM_DRIVER
+
+#define DOMAIN_IS_INITDOMAIN(info) (__lintzero)
+#define DOMAIN_IS_PRIVILEGED(info) (__lintzero)
+
+#else
+
#define DOMAIN_IS_INITDOMAIN(info) \
(((info)->flags & SIF_INITDOMAIN) == SIF_INITDOMAIN)
#define DOMAIN_IS_PRIVILEGED(info) \
(((info)->flags & SIF_PRIVILEGED) == SIF_PRIVILEGED)
+#endif
+
/*
* start of day information passed up from the hypervisor
*/
diff --git a/usr/src/uts/i86xpv/sys/xen_errno.h b/usr/src/uts/intel/sys/xen_errno.h
index 35a6586eaf..35a6586eaf 100644
--- a/usr/src/uts/i86xpv/sys/xen_errno.h
+++ b/usr/src/uts/intel/sys/xen_errno.h