summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorrab <none@none>2008-03-03 17:05:43 -0800
committerrab <none@none>2008-03-03 17:05:43 -0800
commita576ab5b6e08c47732b3dedca9eaa8a8cbb85720 (patch)
treed3c3e79234e0f53a48282500d03af0cb57e0ac5e /usr/src
parent247f8eaa502d3244b05aa230214295f0f1067c59 (diff)
downloadillumos-gate-a576ab5b6e08c47732b3dedca9eaa8a8cbb85720.tar.gz
6602031 move xVM to xen 3.1
6637636 HVM domU live migration support 6663166 xdb needs to sanity check nr_segments
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_fdio.c9
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_gelf.c32
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_gelf.h12
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_io.h4
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_kvm.c13
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_main.c131
-rw-r--r--usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c656
-rw-r--r--usr/src/uts/common/xen/io/blkif_impl.h89
-rw-r--r--usr/src/uts/common/xen/io/xdb.c237
-rw-r--r--usr/src/uts/common/xen/io/xdb.h11
-rw-r--r--usr/src/uts/common/xen/io/xdf.c26
-rw-r--r--usr/src/uts/common/xen/io/xdf.h9
-rw-r--r--usr/src/uts/common/xen/io/xnb.c15
-rw-r--r--usr/src/uts/common/xen/io/xnf.c5
-rw-r--r--usr/src/uts/common/xen/os/gnttab.c338
-rw-r--r--usr/src/uts/common/xen/os/hypercall.c6
-rw-r--r--usr/src/uts/common/xen/public/acm.h25
-rw-r--r--usr/src/uts/common/xen/public/acm_ops.h59
-rw-r--r--usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h28
-rw-r--r--usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h7
-rw-r--r--usr/src/uts/common/xen/public/arch-x86/xen.h47
-rw-r--r--usr/src/uts/common/xen/public/domctl.h135
-rw-r--r--usr/src/uts/common/xen/public/elfnote.h62
-rw-r--r--usr/src/uts/common/xen/public/event_channel.h13
-rw-r--r--usr/src/uts/common/xen/public/grant_table.h19
-rw-r--r--usr/src/uts/common/xen/public/hvm/hvm_op.h23
-rw-r--r--usr/src/uts/common/xen/public/hvm/params.h28
-rw-r--r--usr/src/uts/common/xen/public/io/blkif.h14
-rw-r--r--usr/src/uts/common/xen/public/io/protocols.h42
-rw-r--r--usr/src/uts/common/xen/public/io/xs_wire.h28
-rw-r--r--usr/src/uts/common/xen/public/memory.h5
-rw-r--r--usr/src/uts/common/xen/public/platform.h40
-rw-r--r--usr/src/uts/common/xen/public/sysctl.h57
-rw-r--r--usr/src/uts/common/xen/public/vcpu.h60
-rw-r--r--usr/src/uts/common/xen/public/xen.h42
-rw-r--r--usr/src/uts/common/xen/sys/gnttab.h26
-rw-r--r--usr/src/uts/i86xpv/io/privcmd.c13
-rw-r--r--usr/src/uts/i86xpv/io/privcmd_hcall.c117
-rw-r--r--usr/src/uts/i86xpv/ml/xenguest.s10
-rw-r--r--usr/src/uts/i86xpv/os/xpv_panic.c57
-rw-r--r--usr/src/uts/intel/os/driver_aliases1
-rw-r--r--usr/src/uts/intel/sys/hypervisor.h4
42 files changed, 1960 insertions, 595 deletions
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_fdio.c b/usr/src/cmd/mdb/common/mdb/mdb_fdio.c
index 4bcac3e687..6fb9a7e1b0 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_fdio.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_fdio.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -323,3 +323,10 @@ mdb_fdio_create_named(int fd, const char *name)
return (io);
}
+
+int
+mdb_fdio_fileno(mdb_io_t *io)
+{
+ fd_data_t *fdp = io->io_data;
+ return (fdp->fd_fd);
+}
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_gelf.c b/usr/src/cmd/mdb/common/mdb/mdb_gelf.c
index f334a018e0..ade3da21cf 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_gelf.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_gelf.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -134,8 +133,8 @@ gelf_sect_init(mdb_gelf_file_t *gf)
return (gf);
}
-static void *
-gelf_sect_load(mdb_gelf_file_t *gf, mdb_gelf_sect_t *gsp)
+void *
+mdb_gelf_sect_load(mdb_gelf_file_t *gf, mdb_gelf_sect_t *gsp)
{
ssize_t nbytes;
@@ -1124,10 +1123,10 @@ mdb_gelf_symtab_create_file_by_name(mdb_gelf_file_t *gf,
if (gst->gst_dsect == NULL || gst->gst_ssect == NULL)
goto err; /* Failed to locate data or string section */
- if (gelf_sect_load(gf, gst->gst_dsect) == NULL)
+ if (mdb_gelf_sect_load(gf, gst->gst_dsect) == NULL)
goto err; /* Failed to load data section */
- if (gelf_sect_load(gf, gst->gst_ssect) == NULL)
+ if (mdb_gelf_sect_load(gf, gst->gst_ssect) == NULL)
goto err; /* Failed to load string section */
if (gf->gf_ehdr.e_ident[EI_CLASS] == ELFCLASS32)
@@ -1278,10 +1277,10 @@ mdb_gelf_symtab_create_dynamic(mdb_gelf_file_t *gf, uint_t tabid)
gst->gst_ssect->gs_shdr.sh_size = dt_strsz;
gst->gst_ssect->gs_shdr.sh_entsize = 0;
- if (gelf_sect_load(gf, gst->gst_dsect) == NULL)
+ if (mdb_gelf_sect_load(gf, gst->gst_dsect) == NULL)
goto err;
- if (gelf_sect_load(gf, gst->gst_ssect) == NULL)
+ if (mdb_gelf_sect_load(gf, gst->gst_ssect) == NULL)
goto err;
if (gf->gf_ehdr.e_ident[EI_CLASS] == ELFCLASS32)
@@ -1876,3 +1875,16 @@ mdb_gelf_rw(mdb_gelf_file_t *gf, void *buf, size_t nbytes, uintptr_t addr,
return (nbytes - resid);
}
+
+mdb_gelf_sect_t *
+mdb_gelf_sect_by_name(mdb_gelf_file_t *gf, const char *name)
+{
+ int i;
+
+ for (i = 0; i < gf->gf_shnum; i++) {
+ if (strcmp(gf->gf_sects[i].gs_name, name) == 0)
+ return (&gf->gf_sects[i]);
+ }
+
+ return (NULL);
+}
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_gelf.h b/usr/src/cmd/mdb/common/mdb/mdb_gelf.h
index 5f7629cfcc..4f9d5a95d4 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_gelf.h
+++ b/usr/src/cmd/mdb/common/mdb/mdb_gelf.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -139,6 +138,11 @@ extern void mdb_gelf_symtab_insert(mdb_gelf_symtab_t *,
extern void mdb_gelf_symtab_delete(mdb_gelf_symtab_t *,
const char *, GElf_Sym *);
+extern mdb_gelf_sect_t *mdb_gelf_sect_by_name(mdb_gelf_file_t *,
+ const char *);
+
+extern void *mdb_gelf_sect_load(mdb_gelf_file_t *, mdb_gelf_sect_t *);
+
#endif /* _MDB */
#ifdef __cplusplus
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_io.h b/usr/src/cmd/mdb/common/mdb/mdb_io.h
index 70ede3807e..2ef4677db4 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_io.h
+++ b/usr/src/cmd/mdb/common/mdb/mdb_io.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -182,6 +182,8 @@ extern void mdb_table_print(uint_t, const char *, ...);
extern int mdb_setupterm(const char *, mdb_io_t *, int *);
+extern int mdb_fdio_fileno(mdb_io_t *);
+
#endif /* _MDB */
#ifdef __cplusplus
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_kvm.c b/usr/src/cmd/mdb/common/mdb/mdb_kvm.c
index caeadde0e5..0c1cc673fa 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_kvm.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_kvm.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1398,6 +1398,17 @@ mdb_kvm_tgt_create(mdb_tgt_t *t, int argc, const char *argv[])
getops = (mdb_kb_ops_t *(*)())dlsym(RTLD_NEXT, "mdb_kb_ops");
+ /*
+ * Load mdb_kb if it's not already loaded during
+ * identification.
+ */
+ if (getops == NULL) {
+ (void) mdb_module_load("mdb_kb",
+ MDB_MOD_GLOBAL | MDB_MOD_SILENT);
+ getops = (mdb_kb_ops_t *(*)())
+ dlsym(RTLD_NEXT, "mdb_kb_ops");
+ }
+
if (getops == NULL || (kt->k_kb_ops = getops()) == NULL) {
warn("failed to load KVM backend ops\n");
goto err;
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_main.c b/usr/src/cmd/mdb/common/mdb/mdb_main.c
index a3b5f67a54..f72ead52e5 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_main.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_main.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -367,6 +367,38 @@ mdb_scf_console_term(void)
return (term);
}
+/*
+ * Unpleasant hack: we might be debugging a hypervisor domain dump.
+ * Earlier versions use a non-ELF file. Later versions are ELF, but are
+ * /always/ ELF64, so our standard ehdr check isn't good enough. Since
+ * we don't want to know too much about the file format, we'll ask
+ * mdb_kb.
+ */
+#ifdef __x86
+static int
+identify_xvm_file(const char *file, int *longmode)
+{
+ int (*identify)(const char *, int *);
+
+ if (mdb_module_load("mdb_kb", MDB_MOD_GLOBAL | MDB_MOD_SILENT) != 0)
+ return (0);
+
+ identify = (int (*)())dlsym(RTLD_NEXT, "xkb_identify");
+
+ if (identify == NULL)
+ return (0);
+
+ return (identify(file, longmode));
+}
+#else
+/*ARGSUSED*/
+static int
+identify_xvm_file(const char *file, int *longmode)
+{
+ return (0);
+}
+#endif /* __x86 */
+
int
main(int argc, char *argv[], char *envp[])
{
@@ -385,6 +417,7 @@ main(int argc, char *argv[], char *envp[])
int fflag = 0, Kflag = 0, Rflag = 0, Sflag = 0, Oflag = 0, Uflag = 0;
int ttylike;
+ int longmode = 0;
stack_t sigstack;
@@ -687,6 +720,33 @@ main(int argc, char *argv[], char *envp[])
if ((mdb.m_shell = getenv("SHELL")) == NULL)
mdb.m_shell = "/bin/sh";
+ /*
+ * If the debugger state is to be inherited from a previous instance,
+ * restore it now prior to path evaluation so that %R is updated.
+ */
+ if ((p = getenv(MDB_CONFIG_ENV_VAR)) != NULL) {
+ mdb_set_config(p);
+ (void) unsetenv(MDB_CONFIG_ENV_VAR);
+ }
+
+ /*
+ * Path evaluation part 1: Create the initial module path to allow
+ * the target constructor to load a support module. Then expand
+ * any command-line arguments that modify the paths.
+ */
+ if (Iflag != NULL)
+ mdb_set_ipath(Iflag);
+ else
+ mdb_set_ipath(MDB_DEF_IPATH);
+
+ if (Lflag != NULL)
+ mdb_set_lpath(Lflag);
+ else
+ mdb_set_lpath(MDB_DEF_LPATH);
+
+ if (mdb_get_prompt() == NULL && !(mdb.m_flags & MDB_FL_ADB))
+ (void) mdb_set_prompt(MDB_DEF_PROMPT);
+
if (tgt_ctor == mdb_kvm_tgt_create) {
if (pidarg != NULL) {
warn("-p and -k options are mutually exclusive\n");
@@ -791,6 +851,19 @@ main(int argc, char *argv[], char *envp[])
mdb_io_destroy(io);
+ if (identify_xvm_file(tgt_argv[0], &longmode) == 1 &&
+ !fflag) {
+#ifdef _LP64
+ if (!longmode)
+ goto reexec;
+#else
+ if (longmode)
+ goto reexec;
+#endif
+ tgt_ctor = mdb_kvm_tgt_create;
+ goto tcreate;
+ }
+
if (tgt_ctor == mdb_rawfile_tgt_create)
goto tcreate; /* skip re-exec and just create target */
@@ -845,62 +918,6 @@ tcreate:
if (tgt_ctor == NULL)
tgt_ctor = mdb_proc_tgt_create;
- /*
- * If the debugger state is to be inherited from a previous instance,
- * restore it now prior to path evaluation so that %R is updated.
- */
- if ((p = getenv(MDB_CONFIG_ENV_VAR)) != NULL) {
- mdb_set_config(p);
- (void) unsetenv(MDB_CONFIG_ENV_VAR);
- }
-
- /*
- * Path evaluation part 1: Create the initial module path to allow
- * the target constructor to load a support module. Then expand
- * any command-line arguments that modify the paths.
- */
- if (Iflag != NULL)
- mdb_set_ipath(Iflag);
- else
- mdb_set_ipath(MDB_DEF_IPATH);
-
- if (Lflag != NULL)
- mdb_set_lpath(Lflag);
- else
- mdb_set_lpath(MDB_DEF_LPATH);
-
- if (mdb_get_prompt() == NULL && !(mdb.m_flags & MDB_FL_ADB))
- (void) mdb_set_prompt(MDB_DEF_PROMPT);
-
-#ifdef __x86
- /*
- * Unpleasant hack: we might be debugging a hypervisor domain dump,
- * which can be a non-ELF file in earlier versions. Since we need to
- * know some unpleasant details about the format of the file, we ask
- * mdb_kb to identify the file if it can, and switch targets based on
- * its response.
- */
- if (tgt_ctor == mdb_rawfile_tgt_create && !fflag) {
- int (*identify)(const char *, int *);
- int longmode;
-
- if (mdb_module_load("mdb_kb",
- MDB_MOD_GLOBAL | MDB_MOD_SILENT) == 0 &&
- (identify = (int (*)())dlsym(RTLD_NEXT, "xkb_identify"))
- != NULL && identify(tgt_argv[0], &longmode) == 1) {
- tgt_ctor = mdb_kvm_tgt_create;
-#ifdef _LP64
- if (!longmode)
- goto reexec;
-#else
- if (longmode)
- goto reexec;
-#endif
- }
- }
-#endif /* __x86 */
-
-
tgt = mdb_tgt_create(tgt_ctor, mdb.m_tgtflags, tgt_argc, tgt_argv);
if (tgt == NULL) {
diff --git a/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c b/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c
index 0dd529dd32..79b0a1c7fa 100644
--- a/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c
+++ b/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c
@@ -19,17 +19,25 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
- * KVM backend for hypervisor domain dumps. We don't use libkvm for such
- * dumps, since they do not have a namelist file or the typical dump structures
- * we expect to aid bootstrapping. Instead, we bootstrap based upon a
- * debug_info structure at a known VA, using the guest's own page tables to
- * resolve to physical addresses, and construct the namelist in a manner
- * similar to ksyms_snapshot().
+ * KVM backend for hypervisor domain dumps. We don't use libkvm for
+ * such dumps, since they do not have a namelist file or the typical
+ * dump structures we expect to aid bootstrapping. Instead, we
+ * bootstrap based upon a debug_info structure at a known VA, using the
+ * guest's own page tables to resolve to physical addresses, and
+ * construct the namelist in a manner similar to ksyms_snapshot().
+ *
+ * Note that there are two formats understood by this module: the older,
+ * ad hoc format, which we call 'core' within this file, and an
+ * ELF-based format, known as 'elf'.
+ *
+ * We only support the older format generated on Solaris dom0: before we
+ * fixed it, core dump files were broken whenever a PFN didn't map a
+ * real MFN (!).
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -62,13 +70,8 @@
#include <mdb/mdb_target_impl.h>
#include <xen/public/xen.h>
-
-#if defined(__i386)
-#define DEF_DEBUG_INFO_VA 0xfb3ff000
-#define PAE_DEBUG_INFO_VA 0xf4bff000
-#elif defined(__amd64)
-#define DEF_DEBUG_INFO_VA 0xfffffffffb7ff000
-#endif
+#include <xen/public/version.h>
+#include <xen/public/elfnote.h>
#define XKB_SHDR_NULL 0
#define XKB_SHDR_SYMTAB 1
@@ -81,18 +84,20 @@
#define XKB_WALK_STR 0x4
#define XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
+#if defined(__i386)
+#define DEBUG_INFO 0xf4bff000
+#elif defined(__amd64)
+#define DEBUG_INFO 0xfffffffffb7ff000
+#endif
+
#define PAGE_SIZE 0x1000
#define PAGE_SHIFT 12
#define PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
#define PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
+#define PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
#define PT_PADDR 0x000ffffffffff000ull
#define PT_VALID 0x1
-/*
- * Once the headers are available easily from within ON, we can use those, but
- * until then these definitions are duplicates.
- */
-
#define XC_CORE_MAGIC 0xF00FEBED
#define XC_CORE_MAGIC_HVM 0xF00FEBEE
@@ -107,6 +112,33 @@ typedef struct xc_core_header {
unsigned int xch_pages_offset;
} xc_core_header_t;
+struct xc_elf_header {
+ uint64_t xeh_magic;
+ uint64_t xeh_nr_vcpus;
+ uint64_t xeh_nr_pages;
+ uint64_t xeh_page_size;
+};
+
+struct xc_elf_version {
+ uint64_t xev_major;
+ uint64_t xev_minor;
+ xen_extraversion_t xev_extra;
+ xen_compile_info_t xev_compile_info;
+ xen_capabilities_info_t xev_capabilities;
+ xen_changeset_info_t xev_changeset;
+ xen_platform_parameters_t xev_platform_parameters;
+ uint64_t xev_pagesize;
+};
+
+/*
+ * Either an old-style (3.0.4) core format, or the ELF format.
+ */
+typedef enum {
+ XKB_FORMAT_UNKNOWN = 0,
+ XKB_FORMAT_CORE = 1,
+ XKB_FORMAT_ELF = 2
+} xkb_type_t;
+
typedef struct mfn_map {
mfn_t mm_mfn;
char *mm_map;
@@ -119,22 +151,46 @@ typedef struct mmu_info {
size_t mi_ptesize;
} mmu_info_t;
+typedef struct xkb_core {
+ xc_core_header_t xc_hdr;
+ void *xc_p2m_buf;
+} xkb_core_t;
+
+typedef struct xkb_elf {
+ mdb_gelf_file_t *xe_gelf;
+ size_t *xe_off;
+ struct xc_elf_header xe_hdr;
+ struct xc_elf_version xe_version;
+} xkb_elf_t;
+
typedef struct xkb {
char *xkb_path;
int xkb_fd;
- xc_core_header_t xkb_hdr;
- char *xkb_namelist;
- size_t xkb_namesize;
- struct vcpu_guest_context *xkb_ctxts;
+
+ xkb_type_t xkb_type;
+ xkb_core_t xkb_core;
+ xkb_elf_t xkb_elf;
+
+ size_t xkb_nr_vcpus;
+ size_t xkb_nr_pages;
+ size_t xkb_pages_off;
+ xen_pfn_t xkb_max_pfn;
mfn_t xkb_max_mfn;
+ int xkb_is_pae;
+
mmu_info_t xkb_mmu;
+ debug_info_t xkb_info;
+
+ struct vcpu_guest_context *xkb_vcpus;
+
char *xkb_pages;
mfn_t *xkb_p2m;
- void *xkb_p2m_buf;
xen_pfn_t *xkb_m2p;
- debug_info_t xkb_info;
mfn_map_t xkb_pt_map[4];
mfn_map_t xkb_map;
+
+ char *xkb_namelist;
+ size_t xkb_namesize;
} xkb_t;
static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
@@ -155,10 +211,20 @@ static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
static int xkb_close(xkb_t *);
+/*
+ * Jump through the hoops we need to to correctly identify a core file
+ * of either the old or new format.
+ */
int
xkb_identify(const char *file, int *longmode)
{
xc_core_header_t header;
+ mdb_gelf_file_t *gf = NULL;
+ mdb_gelf_sect_t *sect = NULL;
+ mdb_io_t *io = NULL;
+ char *notes = NULL;
+ char *pos;
+ int ret = 0;
size_t sz;
int fd;
@@ -172,24 +238,82 @@ xkb_identify(const char *file, int *longmode)
(void) close(fd);
- if (header.xch_magic != XC_CORE_MAGIC)
- return (0);
-
- *longmode = 0;
+ if (header.xch_magic == XC_CORE_MAGIC) {
+ *longmode = 0;
- /*
- * Indeed.
- */
- sz = header.xch_index_offset - header.xch_ctxt_offset;
+ /*
+ * Indeed.
+ */
+ sz = header.xch_index_offset - header.xch_ctxt_offset;
#ifdef _LP64
- if (sizeof (struct vcpu_guest_context) * header.xch_nr_vcpus == sz)
- *longmode = 1;
+ if (sizeof (struct vcpu_guest_context) *
+ header.xch_nr_vcpus == sz)
+ *longmode = 1;
#else
- if (sizeof (struct vcpu_guest_context) * header.xch_nr_vcpus != sz)
- *longmode = 1;
+ if (sizeof (struct vcpu_guest_context) *
+ header.xch_nr_vcpus != sz)
+ *longmode = 1;
#endif /* _LP64 */
- return (1);
+ return (1);
+ }
+
+ if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
+ return (-1);
+
+ if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
+ goto out;
+
+ if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
+ goto out;
+
+ if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
+ goto out;
+
+ for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
+ struct xc_elf_version *vers;
+ /* LINTED - alignment */
+ Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
+ char *desc;
+ char *name;
+
+ name = pos + sizeof (*nhdr);
+ desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
+
+ pos = desc + nhdr->n_descsz;
+
+ if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
+ continue;
+
+ /*
+ * The contents of this struct differ between 32 and 64
+ * bit; however, not until past the 'xev_capabilities'
+ * member, so we can just about get away with this.
+ */
+
+ /* LINTED - alignment */
+ vers = (struct xc_elf_version *)desc;
+
+ if (strstr(vers->xev_capabilities, "x86_64")) {
+ *longmode = 1;
+ } else if (strstr(vers->xev_capabilities, "x86_32") ||
+ strstr(vers->xev_capabilities, "x86_32p")) {
+ *longmode = 0;
+ } else {
+ mdb_warn("couldn't derive word size of dump; "
+ "assuming 64-bit");
+ *longmode = 1;
+ }
+ }
+
+ ret = 1;
+
+out:
+ if (gf != NULL)
+ mdb_gelf_destroy(gf);
+ else if (io != NULL)
+ mdb_io_destroy(io);
+ return (ret);
}
static void *
@@ -205,6 +329,9 @@ xkb_fail(xkb_t *xkb, const char *msg, ...)
va_end(args);
if (xkb != NULL)
(void) xkb_close(xkb);
+
+ errno = ENOEXEC;
+
return (NULL);
}
@@ -213,7 +340,7 @@ xkb_build_m2p(xkb_t *xkb)
{
size_t i;
- for (i = 0; i < xkb->xkb_hdr.xch_nr_pages; i++) {
+ for (i = 0; i <= xkb->xkb_max_pfn; i++) {
if (xkb->xkb_p2m[i] != MFN_INVALID &&
xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
xkb->xkb_max_mfn = xkb->xkb_p2m[i];
@@ -225,7 +352,7 @@ xkb_build_m2p(xkb_t *xkb)
for (i = 0; i <= xkb->xkb_max_mfn; i++)
xkb->xkb_m2p[i] = PFN_INVALID;
- for (i = 0; i < xkb->xkb_hdr.xch_nr_pages; i++) {
+ for (i = 0; i <= xkb->xkb_max_pfn; i++) {
if (xkb->xkb_p2m[i] != MFN_INVALID)
xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
}
@@ -234,37 +361,93 @@ xkb_build_m2p(xkb_t *xkb)
}
/*
- * Just to make things jolly fun, they've not page-aligned the p2m table.
+ * With FORMAT_CORE, we can use the table in the dump file directly.
+ * Just to make things fun, they've not page-aligned the p2m table.
*/
static int
xkb_map_p2m(xkb_t *xkb)
{
offset_t off;
size_t size;
- size_t count = xkb->xkb_hdr.xch_nr_pages;
- size_t boff = xkb->xkb_hdr.xch_index_offset;
+ xkb_core_t *xc = &xkb->xkb_core;
+ size_t count = xkb->xkb_nr_pages;
+ size_t boff = xc->xc_hdr.xch_index_offset;
- size = sizeof (mfn_t) * count + (PAGE_SIZE) * 2;
+ size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
size = PAGE_MASK(size);
off = PAGE_MASK(boff);
/* LINTED - alignment */
- xkb->xkb_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
+ xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
MAP_SHARED, xkb->xkb_fd, off);
- if (xkb->xkb_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
+ if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
(void) xkb_fail(xkb, "cannot map p2m table");
return (0);
}
/* LINTED - alignment */
- xkb->xkb_p2m = (mfn_t *)((char *)xkb->xkb_p2m_buf +
+ xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
PAGE_OFFSET(boff));
return (1);
}
/*
+ * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
+ * into a linear array indexed by pfn for convenience. We also need to
+ * track the mapping between mfn and the offset in the file: a pfn with
+ * no mfn will not appear in the core file.
+ */
+static int
+xkb_build_p2m(xkb_t *xkb)
+{
+ xkb_elf_t *xe = &xkb->xkb_elf;
+ mdb_gelf_sect_t *sect;
+ size_t size;
+ size_t i;
+
+ struct elf_p2m {
+ uint64_t pfn;
+ uint64_t gmfn;
+ } *p2m;
+
+ sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
+
+ if (sect == NULL) {
+ (void) xkb_fail(xkb, "cannot find section .xen_p2m");
+ return (0);
+ }
+
+ if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
+ (void) xkb_fail(xkb, "couldn't read .xen_p2m");
+ return (0);
+ }
+
+ for (i = 0; i < xkb->xkb_nr_pages; i++) {
+ if (p2m[i].pfn > xkb->xkb_max_pfn)
+ xkb->xkb_max_pfn = p2m[i].pfn;
+ }
+
+ size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
+ xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
+ size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
+ xe->xe_off = mdb_alloc(size, UM_SLEEP);
+
+ for (i = 0; i <= xkb->xkb_max_pfn; i++) {
+ xkb->xkb_p2m[i] = PFN_INVALID;
+ xe->xe_off[i] = (size_t)-1;
+ }
+
+ for (i = 0; i < xkb->xkb_nr_pages; i++) {
+ xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
+ xe->xe_off[p2m[i].pfn] = i;
+ }
+
+ return (1);
+}
+
+/*
* Return the MFN of the top-level page table for the given as.
*/
static mfn_t
@@ -284,7 +467,7 @@ xkb_as_to_mfn(xkb_t *xkb, struct as *as)
&pfn))
return (MFN_INVALID);
- if (pfn >= xkb->xkb_hdr.xch_nr_pages)
+ if (pfn > xkb->xkb_max_pfn)
return (MFN_INVALID);
return (xkb->xkb_p2m[pfn]);
@@ -295,8 +478,8 @@ xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
void *buf, size_t size)
{
size_t left = size;
- int windowed = xkb->xkb_pages == NULL;
- mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_ctxts[0].ctrlreg[3]);
+ int windowed = (xkb->xkb_pages == NULL);
+ mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_vcpus[0].ctrlreg[3]);
if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
return (-1);
@@ -314,7 +497,7 @@ xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
return (-1);
} else {
xen_pfn_t pfn = pos >> PAGE_SHIFT;
- if (pfn >= xkb->xkb_hdr.xch_nr_pages)
+ if (pfn > xkb->xkb_max_pfn)
return (-1);
mfn = xkb->xkb_p2m[pfn];
if (mfn == MFN_INVALID)
@@ -405,6 +588,18 @@ xkb_readstr(xkb_t *xkb, uintptr_t addr)
}
static offset_t
+xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
+{
+ if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
+ return (-1ULL);
+
+ if (xkb->xkb_type == XKB_FORMAT_CORE)
+ return (PAGE_SIZE * pfn);
+
+ return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
+}
+
+static offset_t
xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
{
xen_pfn_t pfn;
@@ -417,13 +612,13 @@ xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
if (pfn == PFN_INVALID)
return (-1ULL);
- return (xkb->xkb_hdr.xch_pages_offset + (PAGE_SIZE * pfn));
+ return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
}
static char *
xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
{
- int windowed = xkb->xkb_pages == NULL;
+ int windowed = (xkb->xkb_pages == NULL);
offset_t off;
if (mm->mm_mfn == mfn)
@@ -458,7 +653,7 @@ xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
if (pfn == PFN_INVALID)
return (NULL);
- mm->mm_map = xkb->xkb_pages + (PAGE_SIZE * pfn);
+ mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
}
return (mm->mm_map);
@@ -467,10 +662,12 @@ xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
static mfn_t
xkb_pte_to_mfn(mmu_info_t *mmu, char *ptep)
{
- /* LINTED - alignment */
- uint64_t pte = *((uint64_t *)ptep);
+ uint64_t pte = 0;
- if (mmu->mi_ptesize == 4) {
+ if (mmu->mi_ptesize == 8) {
+ /* LINTED - alignment */
+ pte = *((uint64_t *)ptep);
+ } else {
/* LINTED - alignment */
pte = *((uint32_t *)ptep);
}
@@ -759,70 +956,223 @@ xkb_build_ksyms(xkb_t *xkb)
return (1);
}
-/*ARGSUSED*/
-xkb_t *
-xkb_open(const char *namelist, const char *corefile, const char *swapfile,
- int flag, const char *err)
+static xkb_t *
+xkb_open_core(xkb_t *xkb)
{
- struct stat64 corestat;
- uintptr_t debug_va = DEF_DEBUG_INFO_VA;
+ xkb_core_t *xc = &xkb->xkb_core;
size_t sz;
- size_t i;
- xkb_t *xkb = NULL;
- if (stat64(corefile, &corestat) == -1)
- return (xkb_fail(xkb, "cannot stat %s", corefile));
+ xkb->xkb_type = XKB_FORMAT_CORE;
- if (flag != O_RDONLY)
- return (xkb_fail(xkb, "invalid open flags"));
+ if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
+ return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
- xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
+ if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
+ sizeof (xc->xc_hdr))
+ return (xkb_fail(xkb, "invalid dump file"));
- for (i = 0; i < 4; i++)
- xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
+ if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
+ return (xkb_fail(xkb, "cannot process HVM images"));
- xkb->xkb_map.mm_map = (char *)MAP_FAILED;
- xkb->xkb_p2m_buf = (char *)MAP_FAILED;
+ if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
+ return (xkb_fail(xkb, "invalid magic %d",
+ xc->xc_hdr.xch_magic));
+ }
- xkb->xkb_path = strdup(corefile);
+ /*
+ * With FORMAT_CORE, all pages are in the dump (non-existing
+ * ones are zeroed out).
+ */
+ xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
+ xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
+ xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
+ xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
- if ((xkb->xkb_fd = open64(corefile, O_RDONLY)) == -1)
- return (xkb_fail(xkb, "cannot open %s", corefile));
+ sz = xkb->xkb_nr_vcpus * sizeof (*xkb->xkb_vcpus);
- if (pread64(xkb->xkb_fd, &xkb->xkb_hdr, sizeof (xkb->xkb_hdr), 0) !=
- sizeof (xkb->xkb_hdr))
- return (xkb_fail(xkb, "invalid dump file"));
+ xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
- if (xkb->xkb_hdr.xch_magic == XC_CORE_MAGIC_HVM)
+ if (pread64(xkb->xkb_fd, xkb->xkb_vcpus, sz,
+ xc->xc_hdr.xch_ctxt_offset) != sz)
+ return (xkb_fail(xkb, "cannot read VCPU contexts"));
+
+ if (xkb->xkb_vcpus[0].flags & VGCF_HVM_GUEST)
return (xkb_fail(xkb, "cannot process HVM images"));
- if (xkb->xkb_hdr.xch_magic != XC_CORE_MAGIC) {
- return (xkb_fail(xkb, "invalid magic %d",
- xkb->xkb_hdr.xch_magic));
+ /*
+ * Try to map all the data pages. If we can't, fall back to the
+ * window/pread() approach, which is significantly slower.
+ */
+ xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
+ PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
+
+ if (xkb->xkb_pages == (char *)MAP_FAILED)
+ xkb->xkb_pages = NULL;
+
+ /*
+ * We'd like to adapt for correctness' sake, but we have no way of
+ * detecting a PAE guest, since cr4 writes are disallowed.
+ */
+ xkb->xkb_is_pae = 1;
+
+ if (!xkb_map_p2m(xkb))
+ return (NULL);
+
+ return (xkb);
+}
+
+static xkb_t *
+xkb_open_elf(xkb_t *xkb)
+{
+ xkb_elf_t *xe = &xkb->xkb_elf;
+ mdb_gelf_sect_t *sect;
+ char *notes;
+ char *pos;
+ mdb_io_t *io;
+
+ if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
+ O_RDONLY, 0)) == NULL)
+ return (xkb_fail(xkb, "failed to open"));
+
+ xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
+
+ if (xe->xe_gelf == NULL) {
+ mdb_io_destroy(io);
+ return (xkb);
}
- sz = xkb->xkb_hdr.xch_nr_vcpus * sizeof (*xkb->xkb_ctxts);
+ xkb->xkb_fd = mdb_fdio_fileno(io);
- xkb->xkb_ctxts = mdb_alloc(sz, UM_SLEEP);
+ sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
- if (pread64(xkb->xkb_fd, xkb->xkb_ctxts, sz,
- xkb->xkb_hdr.xch_ctxt_offset) != sz)
- return (xkb_fail(xkb, "cannot read VCPU contexts"));
+ if (sect == NULL)
+ return (xkb);
+
+ if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
+ return (xkb);
+
+ /*
+ * Now we know this is indeed a hypervisor core dump, even if
+ * it's corrupted.
+ */
+ xkb->xkb_type = XKB_FORMAT_ELF;
+
+ for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
+ /* LINTED - alignment */
+ Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
+ uint64_t vers;
+ char *desc;
+ char *name;
+
+ name = pos + sizeof (*nhdr);
+ desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
- if (xkb->xkb_ctxts[0].flags & VGCF_HVM_GUEST)
+ pos = desc + nhdr->n_descsz;
+
+ switch (nhdr->n_type) {
+ case XEN_ELFNOTE_DUMPCORE_NONE:
+ break;
+
+ case XEN_ELFNOTE_DUMPCORE_HEADER:
+ if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
+ return (xkb_fail(xkb, "invalid ELF note "
+ "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
+ }
+
+ bcopy(desc, &xe->xe_hdr,
+ sizeof (struct xc_elf_header));
+ break;
+
+ case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
+ if (nhdr->n_descsz != sizeof (struct xc_elf_version)) {
+ return (xkb_fail(xkb, "invalid ELF note "
+ "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
+ }
+
+ bcopy(desc, &xe->xe_version,
+ sizeof (struct xc_elf_version));
+ break;
+
+ case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
+ /* LINTED - alignment */
+ vers = *((uint64_t *)desc);
+ if ((vers >> 32) != 0) {
+ return (xkb_fail(xkb, "unknown major "
+ "version %d (expected 0)\n",
+ (int)(vers >> 32)));
+ }
+
+ if ((vers & 0xffffffff) != 1) {
+ mdb_warn("unexpected dump minor number "
+ "version %d (expected 1)\n",
+ (int)(vers & 0xffffffff));
+ }
+ break;
+
+ default:
+ mdb_warn("unknown ELF note %d(%s)\n",
+ nhdr->n_type, name);
+ break;
+ }
+ }
+
+ if (xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM)
return (xkb_fail(xkb, "cannot process HVM images"));
+ if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC) {
+ return (xkb_fail(xkb, "invalid magic %d",
+ xe->xe_hdr.xeh_magic));
+ }
+
+ xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
+ xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
+ "x86_32p") != NULL);
+
+ sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
+
+ if (sect == NULL)
+ return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
+
+ if (sect->gs_shdr.sh_entsize != sizeof (vcpu_guest_context_t))
+ return (xkb_fail(xkb, "invalid section .xen_prstatus"));
+
+ xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
+
+ if ((xkb->xkb_vcpus = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
+ return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
+
+ sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
+
+ if (sect == NULL)
+ return (xkb_fail(xkb, "cannot find section .xen_pages"));
+
+ if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
+ return (xkb_fail(xkb, ".xen_pages is not page aligned"));
+
+ if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
+ return (xkb_fail(xkb, "invalid section .xen_pages"));
+
+ xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
+
/*
* Try to map all the data pages. If we can't, fall back to the
* window/pread() approach, which is significantly slower.
*/
- xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_hdr.xch_nr_pages,
- PROT_READ, MAP_SHARED, xkb->xkb_fd,
- xkb->xkb_hdr.xch_pages_offset);
+ xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
+ PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
if (xkb->xkb_pages == (char *)MAP_FAILED)
xkb->xkb_pages = NULL;
+ if (!xkb_build_p2m(xkb))
+ return (NULL);
+
+ return (xkb);
+}
+
+static void
+xkb_init_mmu(xkb_t *xkb)
+{
#if defined(__amd64)
xkb->xkb_mmu.mi_max = 3;
xkb->xkb_mmu.mi_shift[0] = 12;
@@ -832,26 +1182,64 @@ xkb_open(const char *namelist, const char *corefile, const char *swapfile,
xkb->xkb_mmu.mi_ptes = 512;
xkb->xkb_mmu.mi_ptesize = 8;
#elif defined(__i386)
- /*
- * We'd like to adapt for correctness' sake, but we have no way of
- * detecting a PAE guest, since cr4 writes are disallowed.
- */
- debug_va = PAE_DEBUG_INFO_VA;
- xkb->xkb_mmu.mi_max = 2;
- xkb->xkb_mmu.mi_shift[0] = 12;
- xkb->xkb_mmu.mi_shift[1] = 21;
- xkb->xkb_mmu.mi_shift[2] = 30;
- xkb->xkb_mmu.mi_ptes = 512;
- xkb->xkb_mmu.mi_ptesize = 8;
+ if (xkb->xkb_is_pae) {
+ xkb->xkb_mmu.mi_max = 2;
+ xkb->xkb_mmu.mi_shift[0] = 12;
+ xkb->xkb_mmu.mi_shift[1] = 21;
+ xkb->xkb_mmu.mi_shift[2] = 30;
+ xkb->xkb_mmu.mi_ptes = 512;
+ xkb->xkb_mmu.mi_ptesize = 8;
+ } else {
+ xkb->xkb_mmu.mi_max = 1;
+ xkb->xkb_mmu.mi_shift[0] = 12;
+ xkb->xkb_mmu.mi_shift[1] = 22;
+ xkb->xkb_mmu.mi_ptes = 1024;
+ xkb->xkb_mmu.mi_ptesize = 4;
+ }
#endif
+}
- if (!xkb_map_p2m(xkb))
+/*ARGSUSED*/
+xkb_t *
+xkb_open(const char *namelist, const char *corefile, const char *swapfile,
+ int flag, const char *err)
+{
+ struct stat64 corestat;
+ xkb_t *xkb = NULL;
+ size_t i;
+
+ if (stat64(corefile, &corestat) == -1)
+ return (xkb_fail(xkb, "cannot stat %s", corefile));
+
+ if (flag != O_RDONLY)
+ return (xkb_fail(xkb, "invalid open flags"));
+
+ xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
+
+ for (i = 0; i < 4; i++)
+ xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
+
+ xkb->xkb_type = XKB_FORMAT_UNKNOWN;
+ xkb->xkb_map.mm_map = (char *)MAP_FAILED;
+ xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
+ xkb->xkb_fd = -1;
+
+ xkb->xkb_path = strdup(corefile);
+
+ if ((xkb = xkb_open_elf(xkb)) == NULL)
return (NULL);
+ if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
+ if (!xkb_open_core(xkb))
+ return (NULL);
+ }
+
+ xkb_init_mmu(xkb);
+
if (!xkb_build_m2p(xkb))
return (NULL);
- if (xkb_read(xkb, debug_va, &xkb->xkb_info,
+ if (xkb_read(xkb, DEBUG_INFO, &xkb->xkb_info,
sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
return (xkb_fail(xkb, "cannot read debug_info"));
@@ -874,7 +1262,6 @@ xkb_open(const char *namelist, const char *corefile, const char *swapfile,
int
xkb_close(xkb_t *xkb)
{
- size_t sz;
size_t i;
if (xkb == NULL)
@@ -885,14 +1272,9 @@ xkb_close(xkb_t *xkb)
(xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
}
- sz = sizeof (xen_pfn_t) * xkb->xkb_hdr.xch_nr_pages;
-
- if (xkb->xkb_p2m_buf != (xen_pfn_t *)MAP_FAILED)
- (void) munmap(xkb->xkb_p2m_buf, sz);
-
if (xkb->xkb_pages != NULL) {
(void) munmap((void *)xkb->xkb_pages,
- PAGE_SIZE * xkb->xkb_hdr.xch_nr_pages);
+ PAGE_SIZE * xkb->xkb_nr_pages);
} else {
for (i = 0; i < 4; i++) {
char *addr = xkb->xkb_pt_map[i].mm_map;
@@ -905,16 +1287,44 @@ xkb_close(xkb_t *xkb)
}
}
- if (xkb->xkb_ctxts != NULL) {
- mdb_free(xkb->xkb_ctxts, sizeof (struct vcpu_guest_context) *
- xkb->xkb_hdr.xch_nr_vcpus);
- }
-
if (xkb->xkb_namelist != NULL)
mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
- if (xkb->xkb_fd != -1)
- (void) close(xkb->xkb_fd);
+ if (xkb->xkb_type == XKB_FORMAT_ELF) {
+ xkb_elf_t *xe = &xkb->xkb_elf;
+ size_t sz;
+
+ if (xe->xe_gelf != NULL)
+ mdb_gelf_destroy(xe->xe_gelf);
+
+ sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
+
+ if (xkb->xkb_p2m != NULL)
+ mdb_free(xkb->xkb_p2m, sz);
+
+ sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
+
+ if (xe->xe_off != NULL)
+ mdb_free(xe->xe_off, sz);
+ } else if (xkb->xkb_type == XKB_FORMAT_CORE) {
+ xkb_core_t *xc = &xkb->xkb_core;
+ size_t sz;
+
+ if (xkb->xkb_fd != -1)
+ (void) close(xkb->xkb_fd);
+
+ sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
+ sz = PAGE_MASK(sz);
+
+ if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
+ (void) munmap(xc->xc_p2m_buf, sz);
+
+ if (xkb->xkb_vcpus != NULL) {
+ sz = sizeof (struct vcpu_guest_context) *
+ xkb->xkb_nr_vcpus;
+ mdb_free(xkb->xkb_vcpus, sz);
+ }
+ }
free(xkb->xkb_path);
@@ -937,7 +1347,7 @@ xkb_sym_io(xkb_t *xkb, const char *symfile)
uint64_t
xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
{
- mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_ctxts[0].ctrlreg[3]);
+ mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_vcpus[0].ctrlreg[3]);
mfn_t mfn;
if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
@@ -959,14 +1369,14 @@ xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
struct cpu_user_regs *ur;
struct regs *regs;
- if (cpu >= xkb->xkb_hdr.xch_nr_vcpus) {
+ if (cpu >= xkb->xkb_nr_vcpus) {
errno = EINVAL;
return (-1);
}
bzero(mregs, sizeof (*mregs));
- vcpu = &xkb->xkb_ctxts[cpu];
+ vcpu = &xkb->xkb_vcpus[cpu];
ur = &vcpu->user_regs;
regs = &mregs->pm_gregs;
diff --git a/usr/src/uts/common/xen/io/blkif_impl.h b/usr/src/uts/common/xen/io/blkif_impl.h
new file mode 100644
index 0000000000..2684b81767
--- /dev/null
+++ b/usr/src/uts/common/xen/io/blkif_impl.h
@@ -0,0 +1,89 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __XEN_BLKIF_H__
+#define __XEN_BLKIF_H__
+
+#include <public/io/ring.h>
+#include <public/io/blkif.h>
+#include <public/io/protocols.h>
+
+/* Not a real protocol. Used to generate ring structs which contain
+ * the elements common to all protocols only. This way we get a
+ * compiler-checkable way to use common struct elements, so we can
+ * avoid using switch(protocol) in a number of places. */
+
+/* i386 protocol version */
+
+#pragma pack(4)
+
+struct blkif_x86_32_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_32_response {
+ uint64_t id; /* copied from request */
+ uint8_t operation; /* copied from request */
+ int16_t status; /* BLKIF_RSP_??? */
+};
+typedef struct blkif_x86_32_request blkif_x86_32_request_t;
+typedef struct blkif_x86_32_response blkif_x86_32_response_t;
+
+#pragma pack()
+
+/* x86_64 protocol version */
+struct blkif_x86_64_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+#if defined(__GNUC__)
+ uint64_t __attribute__((__aligned__(8))) id;
+#else
+ uint8_t pad[4];
+ uint64_t id;
+#endif
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_64_response {
+#if defined(__GNUC__)
+ uint64_t __attribute__((__aligned__(8))) id;
+#else
+ uint64_t id;
+#endif
+ uint8_t operation; /* copied from request */
+ int16_t status; /* BLKIF_RSP_??? */
+};
+typedef struct blkif_x86_64_request blkif_x86_64_request_t;
+typedef struct blkif_x86_64_response blkif_x86_64_response_t;
+
+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response);
+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response);
+
+enum blkif_protocol {
+ BLKIF_PROTOCOL_NATIVE = 1,
+ BLKIF_PROTOCOL_X86_32 = 2,
+ BLKIF_PROTOCOL_X86_64 = 3,
+};
+
+#endif /* __XEN_BLKIF_H__ */
diff --git a/usr/src/uts/common/xen/io/xdb.c b/usr/src/uts/common/xen/io/xdb.c
index 33a075ac3d..e4013de7b4 100644
--- a/usr/src/uts/common/xen/io/xdb.c
+++ b/usr/src/uts/common/xen/io/xdb.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -73,11 +73,17 @@
#include <sys/gnttab.h>
#include <sys/lofi.h>
#include <io/xdf.h>
+#include <xen/io/blkif_impl.h>
#include <io/xdb.h>
static xdb_t *xdb_statep;
static int xdb_debug = 0;
+static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t);
+static int xdb_get_request(xdb_t *, blkif_request_t *);
+static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *);
+static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *);
+
#ifdef DEBUG
/*
* debug aid functions
@@ -90,18 +96,18 @@ logva(xdb_t *vdp, uint64_t va)
int i;
page_addrs = vdp->page_addrs;
- for (i = 0; i < XDB_MAX_IO_PAGES; i++) {
+ for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
if (page_addrs[i] == va)
debug_enter("VA remapping found!");
}
- for (i = 0; i < XDB_MAX_IO_PAGES; i++) {
+ for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
if (page_addrs[i] == 0) {
page_addrs[i] = va;
break;
}
}
- ASSERT(i < XDB_MAX_IO_PAGES);
+ ASSERT(i < XDB_MAX_IO_PAGES(vdp));
}
static void
@@ -111,13 +117,13 @@ unlogva(xdb_t *vdp, uint64_t va)
int i;
page_addrs = vdp->page_addrs;
- for (i = 0; i < XDB_MAX_IO_PAGES; i++) {
+ for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
if (page_addrs[i] == va) {
page_addrs[i] = 0;
break;
}
}
- ASSERT(i < XDB_MAX_IO_PAGES);
+ ASSERT(i < XDB_MAX_IO_PAGES(vdp));
}
static void
@@ -434,18 +440,10 @@ xdb_free_req(xdb_request_t *req)
static void
xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok)
{
- xendev_ring_t *ringp = vdp->xs_ring;
ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
- blkif_response_t *resp;
-
- resp = xvdi_ring_get_response(ringp);
- ASSERT(resp);
- ddi_put64(acchdl, &resp->id, ddi_get64(acchdl, &req->id));
- ddi_put8(acchdl, &resp->operation, ddi_get8(acchdl, &req->operation));
- ddi_put16(acchdl, (uint16_t *)&resp->status,
- ok ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
- if (xvdi_ring_push_response(ringp))
+ if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id),
+ ddi_get8(acchdl, &req->operation), ok))
xvdi_notify_oe(vdp->xs_dip);
}
@@ -454,18 +452,28 @@ xdb_init_ioreqs(xdb_t *vdp)
{
int i;
- for (i = 0; i < BLKIF_RING_SIZE; i++) {
+ ASSERT(vdp->xs_nentry);
+
+ if (vdp->xs_req == NULL)
+ vdp->xs_req = kmem_alloc(vdp->xs_nentry *
+ sizeof (xdb_request_t), KM_SLEEP);
+#ifdef DEBUG
+ if (vdp->page_addrs == NULL)
+ vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) *
+ sizeof (uint64_t), KM_SLEEP);
+#endif
+ for (i = 0; i < vdp->xs_nentry; i++) {
vdp->xs_req[i].xr_idx = i;
vdp->xs_req[i].xr_next = i + 1;
}
- vdp->xs_req[BLKIF_RING_SIZE - 1].xr_next = -1;
+ vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1;
vdp->xs_free_req = 0;
/* alloc va in host dom for io page mapping */
vdp->xs_iopage_va = vmem_xalloc(heap_arena,
- XDB_MAX_IO_PAGES * PAGESIZE, PAGESIZE, 0, 0, 0, 0,
+ XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0,
VM_SLEEP);
- for (i = 0; i < XDB_MAX_IO_PAGES; i++)
+ for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
hat_prepare_mapping(kas.a_hat,
vdp->xs_iopage_va + i * PAGESIZE);
}
@@ -475,18 +483,29 @@ xdb_uninit_ioreqs(xdb_t *vdp)
{
int i;
- for (i = 0; i < XDB_MAX_IO_PAGES; i++)
+ for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
hat_release_mapping(kas.a_hat,
vdp->xs_iopage_va + i * PAGESIZE);
vmem_xfree(heap_arena, vdp->xs_iopage_va,
- XDB_MAX_IO_PAGES * PAGESIZE);
+ XDB_MAX_IO_PAGES(vdp) * PAGESIZE);
+ if (vdp->xs_req != NULL) {
+ kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t));
+ vdp->xs_req = NULL;
+ }
+#ifdef DEBUG
+ if (vdp->page_addrs != NULL) {
+ kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) *
+ sizeof (uint64_t));
+ vdp->page_addrs = NULL;
+ }
+#endif
}
static uint_t
xdb_intr(caddr_t arg)
{
- xendev_ring_t *ringp;
- blkif_request_t *req;
+ blkif_request_t req;
+ blkif_request_t *reqp = &req;
xdb_request_t *xreq;
buf_t *bp;
uint8_t op;
@@ -506,8 +525,6 @@ xdb_intr(caddr_t arg)
return (DDI_INTR_UNCLAIMED);
}
- ringp = vdp->xs_ring;
-
/*
* We'll loop till there is no more request in the ring
* We won't stuck in this loop for ever since the size of ring buffer
@@ -516,16 +533,16 @@ xdb_intr(caddr_t arg)
*/
/* req_event will be increased in xvdi_ring_get_request() */
- while ((req = xvdi_ring_get_request(ringp)) != NULL) {
+ while (xdb_get_request(vdp, reqp)) {
ret = DDI_INTR_CLAIMED;
- op = ddi_get8(vdp->xs_ring_hdl, &req->operation);
+ op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation);
if (op == BLKIF_OP_READ ||
op == BLKIF_OP_WRITE ||
op == BLKIF_OP_WRITE_BARRIER ||
op == BLKIF_OP_FLUSH_DISKCACHE) {
#ifdef DEBUG
- xdb_dump_request_oe(req);
+ xdb_dump_request_oe(reqp);
#endif
xreq = xdb_get_req(vdp);
ASSERT(xreq);
@@ -545,11 +562,11 @@ xdb_intr(caddr_t arg)
}
xreq->xr_curseg = 0; /* start from first segment */
- bp = xdb_get_buf(vdp, req, xreq);
+ bp = xdb_get_buf(vdp, reqp, xreq);
if (bp == NULL) {
/* failed to form a buf */
xdb_free_req(xreq);
- xdb_response(vdp, req, B_FALSE);
+ xdb_response(vdp, reqp, B_FALSE);
continue;
}
bp->av_forw = NULL;
@@ -566,9 +583,8 @@ xdb_intr(caddr_t arg)
vdp->xs_l_iobuf->av_forw = bp;
vdp->xs_l_iobuf = bp;
}
- vdp->xs_ionum++;
} else {
- xdb_response(vdp, req, B_FALSE);
+ xdb_response(vdp, reqp, B_FALSE);
XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
"Unsupported cmd received from dom %d",
ddi_get_name_addr(dip), vdp->xs_peer));
@@ -586,14 +602,11 @@ xdb_intr(caddr_t arg)
static int
xdb_biodone(buf_t *bp)
{
- blkif_response_t *resp;
int i, err, bioerr;
uint8_t segs;
gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
xdb_request_t *xreq = XDB_BP2XREQ(bp);
xdb_t *vdp = xreq->xr_vdp;
- xendev_ring_t *ringp = vdp->xs_ring;
- ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
buf_t *nbp;
bioerr = geterror(bp);
@@ -663,13 +676,7 @@ xdb_biodone(buf_t *bp)
/* send response back to frontend */
if (vdp->xs_if_status == XDB_CONNECTED) {
- resp = xvdi_ring_get_response(ringp);
- ASSERT(resp);
- ddi_put64(acchdl, &resp->id, xreq->xr_id);
- ddi_put8(acchdl, &resp->operation, xreq->xr_op);
- ddi_put16(acchdl, (uint16_t *)&resp->status,
- bioerr ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY);
- if (xvdi_ring_push_response(ringp))
+ if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr))
xvdi_notify_oe(vdp->xs_dip);
XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
"sent resp back to frontend, id=%llu",
@@ -680,9 +687,10 @@ xdb_biodone(buf_t *bp)
xdb_free_req(xreq);
vdp->xs_ionum--;
- if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0))
+ if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0)) {
/* we're closing, someone is waiting for I/O clean-up */
cv_signal(&vdp->xs_ionumcv);
+ }
mutex_exit(&vdp->xs_iomutex);
@@ -697,6 +705,7 @@ xdb_bindto_frontend(xdb_t *vdp)
grant_ref_t gref;
evtchn_port_t evtchn;
dev_info_t *dip = vdp->xs_dip;
+ char protocol[64] = "";
/*
* Gather info from frontend
@@ -713,11 +722,50 @@ xdb_bindto_frontend(xdb_t *vdp)
return (DDI_FAILURE);
}
+ vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE;
+ vdp->xs_nentry = BLKIF_RING_SIZE;
+ vdp->xs_entrysize = sizeof (union blkif_sring_entry);
+
+ err = xenbus_gather(XBT_NULL, oename,
+ "protocol", "%63s", protocol, NULL);
+ if (err)
+ (void) strcpy(protocol, "unspecified, assuming native");
+ else {
+ /*
+ * We must check for NATIVE first, so that the fast path
+ * is taken for copying data from the guest to the host.
+ */
+ if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) {
+ if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
+ vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32;
+ vdp->xs_nentry = BLKIF_X86_32_RING_SIZE;
+ vdp->xs_entrysize =
+ sizeof (union blkif_x86_32_sring_entry);
+ } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) ==
+ 0) {
+ vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64;
+ vdp->xs_nentry = BLKIF_X86_64_RING_SIZE;
+ vdp->xs_entrysize =
+ sizeof (union blkif_x86_64_sring_entry);
+ } else {
+ xvdi_fatal_error(dip, err, "unknown protocol");
+ return (DDI_FAILURE);
+ }
+ }
+ }
+#ifdef DEBUG
+ cmn_err(CE_NOTE, "xdb@%s: blkif protocol '%s' ",
+ ddi_get_name_addr(dip), protocol);
+#endif
+
/*
* map and init ring
+ *
+ * The ring parameters must match those which have been allocated
+ * in the front end.
*/
- err = xvdi_map_ring(dip, BLKIF_RING_SIZE,
- sizeof (union blkif_sring_entry), gref, &vdp->xs_ring);
+ err = xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize,
+ gref, &vdp->xs_ring);
if (err != DDI_SUCCESS)
return (DDI_FAILURE);
/*
@@ -1224,6 +1272,7 @@ xdb_send_buf(void *arg)
while ((bp = vdp->xs_f_iobuf) != NULL) {
vdp->xs_f_iobuf = bp->av_forw;
bp->av_forw = NULL;
+ vdp->xs_ionum++;
mutex_exit(&vdp->xs_iomutex);
if (bp->b_bcount != 0) {
int err = ldi_strategy(vdp->xs_ldi_hdl, bp);
@@ -1473,7 +1522,7 @@ static struct dev_ops xdb_dev_ops = {
*/
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. */
- "vbd backend driver %I%", /* Name of the module */
+ "vbd backend driver 1.4", /* Name of the module */
&xdb_dev_ops /* driver ops */
};
@@ -1511,3 +1560,97 @@ _info(struct modinfo *modinfop)
{
return (mod_info(&xdb_modlinkage, modinfop));
}
+
+static int
+xdb_get_request(xdb_t *vdp, blkif_request_t *req)
+{
+ void *src = xvdi_ring_get_request(vdp->xs_ring);
+
+ if (src == NULL)
+ return (0);
+
+ switch (vdp->xs_blk_protocol) {
+ case BLKIF_PROTOCOL_NATIVE:
+ (void) memcpy(req, src, sizeof (*req));
+ break;
+ case BLKIF_PROTOCOL_X86_32:
+ blkif_get_x86_32_req(req, src);
+ break;
+ case BLKIF_PROTOCOL_X86_64:
+ blkif_get_x86_64_req(req, src);
+ break;
+ default:
+ cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d",
+ ddi_get_name_addr(vdp->xs_dip),
+ vdp->xs_blk_protocol);
+ }
+ return (1);
+}
+
+static int
+xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status)
+{
+ ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
+ blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring);
+ blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp;
+ blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp;
+
+ ASSERT(rsp);
+
+ switch (vdp->xs_blk_protocol) {
+ case BLKIF_PROTOCOL_NATIVE:
+ ddi_put64(acchdl, &rsp->id, id);
+ ddi_put8(acchdl, &rsp->operation, op);
+ ddi_put16(acchdl, (uint16_t *)&rsp->status,
+ status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
+ break;
+ case BLKIF_PROTOCOL_X86_32:
+ ddi_put64(acchdl, &rsp_32->id, id);
+ ddi_put8(acchdl, &rsp_32->operation, op);
+ ddi_put16(acchdl, (uint16_t *)&rsp_32->status,
+ status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
+ break;
+ case BLKIF_PROTOCOL_X86_64:
+ ddi_put64(acchdl, &rsp_64->id, id);
+ ddi_put8(acchdl, &rsp_64->operation, op);
+ ddi_put16(acchdl, (uint16_t *)&rsp_64->status,
+ status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
+ break;
+ default:
+ cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d",
+ ddi_get_name_addr(vdp->xs_dip),
+ vdp->xs_blk_protocol);
+ }
+
+ return (xvdi_ring_push_response(vdp->xs_ring));
+}
+
+static void
+blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src)
+{
+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ dst->operation = src->operation;
+ dst->nr_segments = src->nr_segments;
+ dst->handle = src->handle;
+ dst->id = src->id;
+ dst->sector_number = src->sector_number;
+ if (n > src->nr_segments)
+ n = src->nr_segments;
+ for (i = 0; i < n; i++)
+ dst->seg[i] = src->seg[i];
+}
+
+static void
+blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src)
+{
+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ dst->operation = src->operation;
+ dst->nr_segments = src->nr_segments;
+ dst->handle = src->handle;
+ dst->id = src->id;
+ dst->sector_number = src->sector_number;
+ if (n > src->nr_segments)
+ n = src->nr_segments;
+ for (i = 0; i < n; i++)
+ dst->seg[i] = src->seg[i];
+}
diff --git a/usr/src/uts/common/xen/io/xdb.h b/usr/src/uts/common/xen/io/xdb.h
index d4d744d2ac..0abd008d0a 100644
--- a/usr/src/uts/common/xen/io/xdb.h
+++ b/usr/src/uts/common/xen/io/xdb.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -101,7 +101,7 @@ enum xdb_fe_state {
#define XDB_INST2MINOR(i) (minor_t)(i)
#define XDB_INST2SOFTS(instance) \
((xdb_t *)ddi_get_soft_state(xdb_statep, (instance)))
-#define XDB_MAX_IO_PAGES BLKIF_RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST
+#define XDB_MAX_IO_PAGES(v) ((v)->xs_nentry * BLKIF_MAX_SEGMENTS_PER_REQUEST)
/* get kva of a mapped-in page coresponding to (xreq-index, seg) pair */
#define XDB_IOPAGE_VA(_pagebase, _xreqidx, _seg) \
((_pagebase) + ((_xreqidx) \
@@ -192,14 +192,17 @@ struct xdb {
/* head of free list of xdb_request_t */
int xs_free_req;
/* pre-allocated xdb_request_t pool */
- xdb_request_t xs_req[BLKIF_RING_SIZE];
+ xdb_request_t *xs_req;
kstat_t *xs_kstats;
uint64_t xs_stat_req_reads;
uint64_t xs_stat_req_writes;
uint64_t xs_stat_req_barriers;
uint64_t xs_stat_req_flushes;
+ enum blkif_protocol xs_blk_protocol;
+ size_t xs_nentry;
+ size_t xs_entrysize;
#ifdef DEBUG
- uint64_t page_addrs[XDB_MAX_IO_PAGES]; /* for debug aid */
+ uint64_t *page_addrs; /* for debug aid */
#endif /* DEBUG */
};
diff --git a/usr/src/uts/common/xen/io/xdf.c b/usr/src/uts/common/xen/io/xdf.c
index 4d695ec992..865eb69230 100644
--- a/usr/src/uts/common/xen/io/xdf.c
+++ b/usr/src/uts/common/xen/io/xdf.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -56,6 +56,7 @@
#include <xen/sys/xendev.h>
#include <sys/gnttab.h>
#include <sys/scsi/generic/inquiry.h>
+#include <xen/io/blkif_impl.h>
#include <io/xdf.h>
#define FLUSH_DISKCACHE 0x1
@@ -325,16 +326,6 @@ xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
ddi_iblock_cookie_t ibc;
ddi_iblock_cookie_t softibc;
int instance;
-#if defined(XPV_HVM_DRIVER) && defined(__i386)
- /* XXX: 6609126 32-bit xdf driver panics on a 64-bit dom0 */
- extern int xen_is_64bit;
-
- if (xen_is_64bit) {
- cmn_err(CE_WARN, "xdf cannot be used in 32-bit domUs on a"
- " 64-bit dom0.");
- return (DDI_FAILURE);
- }
-#endif
xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM,
"xdfdebug", 0);
@@ -1708,6 +1699,19 @@ trans_retry:
goto abort_trans;
}
+ /*
+ * "protocol" is written by the domain builder in the case of PV
+ * domains. However, it is not written for HVM domains, so let's
+ * write it here.
+ */
+ if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s",
+ XEN_IO_PROTO_ABI_NATIVE)) {
+ cmn_err(CE_WARN, "xdf@%s: failed to write protocol",
+ ddi_get_name_addr(dip));
+ xvdi_fatal_error(dip, rv, "writing protocol");
+ goto abort_trans;
+ }
+
if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) {
cmn_err(CE_WARN, "xdf@%s: "
"failed to switch state to XenbusStateInitialised",
diff --git a/usr/src/uts/common/xen/io/xdf.h b/usr/src/uts/common/xen/io/xdf.h
index ea796772dd..4f8b635733 100644
--- a/usr/src/uts/common/xen/io/xdf.h
+++ b/usr/src/uts/common/xen/io/xdf.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -35,7 +35,12 @@ extern "C" {
#endif
-#define BLKIF_RING_SIZE __RING_SIZE((blkif_sring_t *)NULL, PAGESIZE)
+#define BLKIF_RING_SIZE \
+ __RING_SIZE((blkif_sring_t *)NULL, PAGESIZE)
+#define BLKIF_X86_32_RING_SIZE \
+ __RING_SIZE((blkif_x86_32_sring_t *)NULL, PAGESIZE)
+#define BLKIF_X86_64_RING_SIZE \
+ __RING_SIZE((blkif_x86_64_sring_t *)NULL, PAGESIZE)
/*
* VBDs have standard 512 byte blocks
diff --git a/usr/src/uts/common/xen/io/xnb.c b/usr/src/uts/common/xen/io/xnb.c
index fd962b2bcc..e344dbab7a 100644
--- a/usr/src/uts/common/xen/io/xnb.c
+++ b/usr/src/uts/common/xen/io/xnb.c
@@ -114,11 +114,6 @@ int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2);
boolean_t xnb_hv_copy = B_TRUE;
boolean_t xnb_explicit_pageflip_set = B_FALSE;
-#ifdef XNB_DEBUG
-#define NR_GRANT_ENTRIES \
- (NR_GRANT_FRAMES * PAGESIZE / sizeof (grant_entry_t))
-#endif /* XNB_DEBUG */
-
/* XXPV dme: are these really invalid? */
#define INVALID_GRANT_HANDLE ((grant_handle_t)-1)
#define INVALID_GRANT_REF ((grant_ref_t)-1)
@@ -652,10 +647,6 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
cmn_err(CE_PANIC, "xnb_to_peer: "
"id %d out of range in request 0x%p",
rxreq->id, (void *)rxreq);
- if (rxreq->gref >= NR_GRANT_ENTRIES)
- cmn_err(CE_PANIC, "xnb_to_peer: "
- "grant ref %d out of range in request 0x%p",
- rxreq->gref, (void *)rxreq);
#endif /* XNB_DEBUG */
/* Assign a pfn and map the new page at the allocated va. */
@@ -995,10 +986,6 @@ xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
cmn_err(CE_PANIC, "xnb_copy_to_peer: "
"id %d out of range in request 0x%p",
rxreq->id, (void *)rxreq);
- if (rxreq->gref >= NR_GRANT_ENTRIES)
- cmn_err(CE_PANIC, "xnb_copy_to_peer: "
- "grant ref %d out of range in request 0x%p",
- rxreq->gref, (void *)rxreq);
#endif /* XNB_DEBUG */
/* 2 */
@@ -1482,8 +1469,6 @@ finished:
rxp->xr_mop.ref =
RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref;
- ASSERT(rxp->xr_mop.ref < NR_GRANT_ENTRIES);
-
*mop = rxp->xr_mop;
*rxpp = rxp;
}
diff --git a/usr/src/uts/common/xen/io/xnf.c b/usr/src/uts/common/xen/io/xnf.c
index 9fd2712824..597e4cf344 100644
--- a/usr/src/uts/common/xen/io/xnf.c
+++ b/usr/src/uts/common/xen/io/xnf.c
@@ -411,7 +411,8 @@ xnf_setup_rings(xnf_t *xnfp)
gnttab_grant_foreign_access_ref(ref, oeid,
mfn, 0);
} else {
- gnttab_grant_foreign_transfer_ref(ref, oeid);
+ gnttab_grant_foreign_transfer_ref(ref,
+ oeid, 0);
}
rxrp->id = ix;
rxrp->gref = ref;
@@ -1418,7 +1419,7 @@ rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc)
gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0);
} else {
- gnttab_grant_foreign_transfer_ref(ref, oeid);
+ gnttab_grant_foreign_transfer_ref(ref, oeid, 0);
}
}
reqp->id = hang_ix;
diff --git a/usr/src/uts/common/xen/os/gnttab.c b/usr/src/uts/common/xen/os/gnttab.c
index 238c45768e..a8e49feec0 100644
--- a/usr/src/uts/common/xen/os/gnttab.c
+++ b/usr/src/uts/common/xen/os/gnttab.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -31,11 +31,14 @@
*
* Granting foreign access to our memory reservation.
*
- * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2005-2006, Christopher Clark
* Copyright (c) 2004-2005, K A Fraser
*
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
@@ -90,49 +93,47 @@
#include <sys/bootvfs.h>
#include <sys/bootprops.h>
#include <vm/seg_kmem.h>
+#include <sys/mman.h>
-#define cmpxchg(t, c, n) atomic_cas_16((t), (c), (n))
-
-/* External tools reserve first few grant table entries. */
-#define NR_RESERVED_ENTRIES 8
-
-#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * \
- MMU_PAGESIZE / sizeof (grant_entry_t))
-#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1)
-#define VALID_GRANT_REF(r) ((r) < NR_GRANT_ENTRIES)
+/* Globals */
-static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
+static grant_ref_t **gnttab_list;
+static uint_t nr_grant_frames;
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static kmutex_t gnttab_list_lock;
-
static grant_entry_t *shared;
-#define GT_PGADDR(i) ((uintptr_t)shared + ((i) << PAGESHIFT))
+static struct gnttab_free_callback *gnttab_free_callback_list;
-static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
+/* Macros */
-static int
-get_free_entries(int count)
+#define GT_PGADDR(i) ((uintptr_t)shared + ((i) << MMU_PAGESHIFT))
+#define VALID_GRANT_REF(r) ((r) < (nr_grant_frames * GREFS_PER_GRANT_FRAME))
+#define RPP (PAGESIZE / sizeof (grant_ref_t))
+#define GNTTAB_ENTRY(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
+#define CMPXCHG(t, c, n) atomic_cas_16((t), (c), (n))
+/* External tools reserve first few grant table entries. */
+#define NR_RESERVED_ENTRIES 8
+#define GNTTAB_LIST_END 0xffffffff
+#define GREFS_PER_GRANT_FRAME (PAGESIZE / sizeof (grant_entry_t))
+
+/* Implementation */
+
+static uint_t
+max_nr_grant_frames(void)
{
- int ref;
- grant_ref_t head;
+ struct gnttab_query_size query;
+ int rc;
- mutex_enter(&gnttab_list_lock);
- if (gnttab_free_count < count) {
- mutex_exit(&gnttab_list_lock);
- return (-1);
- }
- ref = head = gnttab_free_head;
- gnttab_free_count -= count;
- while (count-- > 1)
- head = gnttab_list[head];
- gnttab_free_head = gnttab_list[head];
- gnttab_list[head] = GNTTAB_LIST_END;
- mutex_exit(&gnttab_list_lock);
- return (ref);
-}
+ query.dom = DOMID_SELF;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
+ if ((rc < 0) || (query.status != GNTST_okay))
+ return (4); /* Legacy max supported number of frames */
-#define get_free_entry() get_free_entries(1)
+ ASSERT(query.max_nr_frames);
+ return (query.max_nr_frames);
+}
static void
do_free_callbacks(void)
@@ -162,13 +163,79 @@ check_free_callbacks(void)
do_free_callbacks();
}
+static int
+grow_gnttab_list(uint_t more_frames)
+{
+ uint_t new_nr_grant_frames, extra_entries, i;
+
+ ASSERT(MUTEX_HELD(&gnttab_list_lock));
+
+ new_nr_grant_frames = nr_grant_frames + more_frames;
+ extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
+
+ for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
+ gnttab_list[i] = kmem_alloc(PAGESIZE, KM_SLEEP);
+
+ for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+ i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
+ GNTTAB_ENTRY(i) = i + 1;
+
+ GNTTAB_ENTRY(i) = gnttab_free_head;
+ gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+ gnttab_free_count += extra_entries;
+
+ nr_grant_frames = new_nr_grant_frames;
+
+ check_free_callbacks();
+
+ return (0);
+}
+
+static int
+gnttab_expand(uint_t req_entries)
+{
+ uint_t cur, extra;
+
+ ASSERT(MUTEX_HELD(&gnttab_list_lock));
+
+ cur = nr_grant_frames;
+ extra = ((req_entries + (GREFS_PER_GRANT_FRAME - 1)) /
+ GREFS_PER_GRANT_FRAME);
+ if (cur + extra > max_nr_grant_frames())
+ return (-1);
+
+ return (grow_gnttab_list(extra));
+}
+
+static int
+get_free_entries(int count)
+{
+ int ref, rc;
+ grant_ref_t head;
+
+ mutex_enter(&gnttab_list_lock);
+ if (gnttab_free_count < count &&
+ ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
+ mutex_exit(&gnttab_list_lock);
+ return (rc);
+ }
+ ref = head = gnttab_free_head;
+ gnttab_free_count -= count;
+ while (count-- > 1)
+ head = GNTTAB_ENTRY(head);
+ gnttab_free_head = GNTTAB_ENTRY(head);
+ GNTTAB_ENTRY(head) = GNTTAB_LIST_END;
+ mutex_exit(&gnttab_list_lock);
+ return (ref);
+}
+
static void
put_free_entry(grant_ref_t ref)
{
ASSERT(VALID_GRANT_REF(ref));
mutex_enter(&gnttab_list_lock);
- gnttab_list[ref] = gnttab_free_head;
+ GNTTAB_ENTRY(ref) = gnttab_free_head;
gnttab_free_head = ref;
gnttab_free_count++;
check_free_callbacks();
@@ -184,7 +251,7 @@ gnttab_grant_foreign_access(domid_t domid, gnttab_frame_t frame, int readonly)
{
int ref;
- if ((ref = get_free_entry()) == -1)
+ if ((ref = get_free_entries(1)) == -1)
return (-1);
ASSERT(VALID_GRANT_REF(ref));
@@ -236,7 +303,7 @@ gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
cmn_err(CE_WARN, "g.e. still in use!");
return (0);
}
- } while ((nflags = cmpxchg(&shared[ref].flags, flags, 0)) != flags);
+ } while ((nflags = CMPXCHG(&shared[ref].flags, flags, 0)) != flags);
return (1);
}
@@ -266,29 +333,26 @@ gnttab_end_foreign_access(grant_ref_t ref, int readonly, gnttab_frame_t page)
}
int
-gnttab_grant_foreign_transfer(domid_t domid)
+gnttab_grant_foreign_transfer(domid_t domid, pfn_t pfn)
{
int ref;
- if ((ref = get_free_entry()) == -1)
+ if ((ref = get_free_entries(1)) == -1)
return (-1);
ASSERT(VALID_GRANT_REF(ref));
- shared[ref].frame = 0;
- shared[ref].domid = domid;
- membar_producer();
- shared[ref].flags = GTF_accept_transfer;
+ gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
return (ref);
}
void
-gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid)
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, pfn_t pfn)
{
ASSERT(VALID_GRANT_REF(ref));
- shared[ref].frame = 0;
+ shared[ref].frame = pfn;
shared[ref].domid = domid;
membar_producer();
shared[ref].flags = GTF_accept_transfer;
@@ -307,7 +371,7 @@ gnttab_end_foreign_transfer_ref(grant_ref_t ref)
* reference and return failure (== 0).
*/
while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
- if (cmpxchg(&shared[ref].flags, flags, 0) == flags)
+ if (CMPXCHG(&shared[ref].flags, flags, 0) == flags)
return (0);
(void) HYPERVISOR_yield();
}
@@ -356,11 +420,11 @@ gnttab_free_grant_references(grant_ref_t head)
return;
mutex_enter(&gnttab_list_lock);
ref = head;
- while (gnttab_list[ref] != GNTTAB_LIST_END) {
- ref = gnttab_list[ref];
+ while (GNTTAB_ENTRY(ref) != GNTTAB_LIST_END) {
+ ref = GNTTAB_ENTRY(ref);
count++;
}
- gnttab_list[ref] = gnttab_free_head;
+ GNTTAB_ENTRY(ref) = gnttab_free_head;
gnttab_free_head = head;
gnttab_free_count += count;
check_free_callbacks();
@@ -381,13 +445,19 @@ gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
}
int
+gnttab_empty_grant_references(const grant_ref_t *private_head)
+{
+ return (*private_head == GNTTAB_LIST_END);
+}
+
+int
gnttab_claim_grant_reference(grant_ref_t *private_head)
{
grant_ref_t g = *private_head;
if (g == GNTTAB_LIST_END)
return (-1);
- *private_head = gnttab_list[g];
+ *private_head = GNTTAB_ENTRY(g);
return (g);
}
@@ -396,7 +466,7 @@ gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release)
{
ASSERT(VALID_GRANT_REF(release));
- gnttab_list[release] = *private_head;
+ GNTTAB_ENTRY(release) = *private_head;
*private_head = release;
}
@@ -417,8 +487,45 @@ out:
mutex_exit(&gnttab_list_lock);
}
-#ifdef XPV_HVM_DRIVER
+void
+gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
+{
+ struct gnttab_free_callback **pcb;
+
+ mutex_enter(&gnttab_list_lock);
+ for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
+ if (*pcb == callback) {
+ *pcb = callback->next;
+ break;
+ }
+ }
+ mutex_exit(&gnttab_list_lock);
+}
+
+static gnttab_frame_t *
+gnttab_setup(gnttab_setup_table_t *pset)
+{
+ gnttab_frame_t *frames;
+
+ frames = kmem_alloc(pset->nr_frames * sizeof (gnttab_frame_t),
+ KM_SLEEP);
+
+ /*LINTED: constant in conditional context*/
+ set_xen_guest_handle(pset->frame_list, frames);
+
+ /*
+ * Take pset->nr_frames pages of grant table space from
+ * the hypervisor and map it
+ */
+ if ((HYPERVISOR_grant_table_op(GNTTABOP_setup_table, pset, 1) != 0) ||
+ (pset->status != 0)) {
+ cmn_err(CE_PANIC, "Grant Table setup failed");
+ }
+
+ return (frames);
+}
+#ifdef XPV_HVM_DRIVER
static void
gnttab_map(void)
{
@@ -428,114 +535,117 @@ gnttab_map(void)
int i;
va = (caddr_t)shared;
- for (i = 0; i < NR_GRANT_FRAMES; i++) {
- pfn = hat_getpfnum(kas.a_hat, va);
+ for (i = 0; i < max_nr_grant_frames(); i++) {
+ if ((pfn = hat_getpfnum(kas.a_hat, va)) == PFN_INVALID)
+ cmn_err(CE_PANIC, "gnttab_map: Invalid pfn");
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
xatp.gpfn = pfn;
hat_unload(kas.a_hat, va, MMU_PAGESIZE, HAT_UNLOAD);
+ /*
+ * This call replaces the existing machine page backing
+ * the given gpfn with the page from the allocated grant
+ * table at index idx. The existing machine page is
+ * returned to the free list.
+ */
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0)
panic("Couldn't map grant table");
-
hat_devload(kas.a_hat, va, MMU_PAGESIZE, pfn,
PROT_READ | PROT_WRITE,
HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
-
va += MMU_PAGESIZE;
}
}
+#endif /* XPV_HVM_DRIVER */
void
gnttab_init(void)
{
+ gnttab_setup_table_t set;
int i;
+ uint_t nr_init_grefs, max_nr_glist_frames;
+ gnttab_frame_t *frames;
- shared = (grant_entry_t *)xen_alloc_pages(NR_GRANT_FRAMES);
+ /*
+ * gnttab_init() should only be invoked once.
+ */
+ mutex_enter(&gnttab_list_lock);
+ ASSERT(nr_grant_frames == 0);
+ nr_grant_frames = 1;
+ mutex_exit(&gnttab_list_lock);
- gnttab_map();
+ max_nr_glist_frames = (max_nr_grant_frames() *
+ GREFS_PER_GRANT_FRAME / (PAGESIZE / sizeof (grant_ref_t)));
- for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++)
- gnttab_list[i] = i + 1;
- gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES;
- gnttab_free_head = NR_RESERVED_ENTRIES;
+ set.dom = DOMID_SELF;
+ set.nr_frames = max_nr_grant_frames();
+ frames = gnttab_setup(&set);
- mutex_init(&gnttab_list_lock, NULL, MUTEX_DEFAULT, NULL);
-}
+#ifdef XPV_HVM_DRIVER
+ shared = (grant_entry_t *)xen_alloc_pages(set.nr_frames);
-void
-gnttab_resume(void)
-{
gnttab_map();
-}
-
#else /* XPV_HVM_DRIVER */
+ shared = vmem_xalloc(heap_arena, set.nr_frames * MMU_PAGESIZE,
+ MMU_PAGESIZE, 0, 0, 0, 0, VM_SLEEP);
+ for (i = 0; i < set.nr_frames; i++) {
+ hat_devload(kas.a_hat, (caddr_t)GT_PGADDR(i), PAGESIZE,
+ xen_assign_pfn(frames[i]), PROT_READ | PROT_WRITE,
+ HAT_LOAD_LOCK);
+ }
+#endif
-void
-gnttab_init(void)
-{
- gnttab_setup_table_t set;
- gnttab_frame_t frames[NR_GRANT_FRAMES];
- int i;
-
- set.dom = DOMID_SELF;
- set.nr_frames = NR_GRANT_FRAMES;
- /*LINTED: constant in conditional context*/
- set_xen_guest_handle(set.frame_list, frames);
+ gnttab_list = kmem_alloc(max_nr_glist_frames * sizeof (grant_ref_t *),
+ KM_SLEEP);
- /*
- * Take 4 pages of grant table space from the hypervisor and map it
- */
- if ((HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &set, 1) != 0) ||
- (set.status != 0)) {
- cmn_err(CE_PANIC, "Grant Table setup failed");
+ for (i = 0; i < nr_grant_frames; i++) {
+ gnttab_list[i] = kmem_alloc(PAGESIZE, KM_SLEEP);
}
- shared = vmem_xalloc(heap_arena, NR_GRANT_FRAMES * MMU_PAGESIZE,
- MMU_PAGESIZE, 0, 0, 0, 0, VM_SLEEP);
+ kmem_free(frames, set.nr_frames * sizeof (gnttab_frame_t));
- for (i = 0; i < NR_GRANT_FRAMES; i++)
- kbm_map_ma(FRAME_TO_MA(frames[i]), GT_PGADDR(i), 0);
+ nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
- for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++)
- gnttab_list[i] = i + 1;
- gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES;
- gnttab_free_head = NR_RESERVED_ENTRIES;
+ for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
+ GNTTAB_ENTRY(i) = i + 1;
- mutex_init(&gnttab_list_lock, NULL, MUTEX_DEFAULT, NULL);
+ GNTTAB_ENTRY(nr_init_grefs - 1) = GNTTAB_LIST_END;
+ gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
+ gnttab_free_head = NR_RESERVED_ENTRIES;
}
void
gnttab_resume(void)
{
gnttab_setup_table_t set;
- gnttab_frame_t frames[NR_GRANT_FRAMES];
int i;
+ gnttab_frame_t *frames;
+ uint_t available_frames = max_nr_grant_frames();
- set.dom = DOMID_SELF;
- set.nr_frames = NR_GRANT_FRAMES;
- /*LINTED: constant in conditional context*/
- set_xen_guest_handle(set.frame_list, frames);
-
- /*
- * Take NR_GRANT_FRAMES pages of grant table space from the
- * hypervisor and map it
- */
- if ((HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &set, 1) != 0) ||
- (set.status != 0)) {
- cmn_err(CE_PANIC, "Grant Table setup failed");
+ if (available_frames < nr_grant_frames) {
+ cmn_err(CE_PANIC, "Hypervisor does not have enough grant "
+ "frames: required(%u), available(%u)", nr_grant_frames,
+ available_frames);
}
- for (i = 0; i < NR_GRANT_FRAMES; i++) {
+#ifdef XPV_HVM_DRIVER
+ gnttab_map();
+#endif /* XPV_HVM_DRIVER */
+
+ set.dom = DOMID_SELF;
+ set.nr_frames = available_frames;
+ frames = gnttab_setup(&set);
+
+ for (i = 0; i < available_frames; i++) {
(void) HYPERVISOR_update_va_mapping(GT_PGADDR(i),
FRAME_TO_MA(frames[i]) | PT_VALID | PT_WRITABLE,
UVMF_INVLPG | UVMF_ALL);
}
+ kmem_free(frames, set.nr_frames * sizeof (gnttab_frame_t));
}
-#endif /* XPV_HVM_DRIVER */
-
void
gnttab_suspend(void)
{
@@ -544,7 +654,7 @@ gnttab_suspend(void)
/*
* clear grant table mappings before suspending
*/
- for (i = 0; i < NR_GRANT_FRAMES; i++) {
+ for (i = 0; i < max_nr_grant_frames(); i++) {
(void) HYPERVISOR_update_va_mapping(GT_PGADDR(i),
0, UVMF_INVLPG);
}
diff --git a/usr/src/uts/common/xen/os/hypercall.c b/usr/src/uts/common/xen/os/hypercall.c
index fae533dfbf..564c5d2fd6 100644
--- a/usr/src/uts/common/xen/os/hypercall.c
+++ b/usr/src/uts/common/xen/os/hypercall.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -300,9 +300,9 @@ HYPERVISOR_mmuext_op(struct mmuext_op *req, int count, uint_t *success_count,
}
long
-HYPERVISOR_acm_op(int cmd, void *arg)
+HYPERVISOR_acm_op(struct xen_acmctl *arg)
{
- return (__hypercall2(__HYPERVISOR_acm_op, (long)cmd, (ulong_t)arg));
+ return (__hypercall1(__HYPERVISOR_acm_op, (ulong_t)arg));
}
long
diff --git a/usr/src/uts/common/xen/public/acm.h b/usr/src/uts/common/xen/public/acm.h
index 23078837fb..ef62da0201 100644
--- a/usr/src/uts/common/xen/public/acm.h
+++ b/usr/src/uts/common/xen/public/acm.h
@@ -56,6 +56,19 @@
#define ACM_ACCESS_DENIED -111
#define ACM_NULL_POINTER_ERROR -200
+/*
+ Error codes reported in when trying to test for a new policy
+ These error codes are reported in an array of tuples where
+ each error code is followed by a parameter describing the error
+ more closely, such as a domain id.
+*/
+#define ACM_EVTCHN_SHARING_VIOLATION 0x100
+#define ACM_GNTTAB_SHARING_VIOLATION 0x101
+#define ACM_DOMAIN_LOOKUP 0x102
+#define ACM_CHWALL_CONFLICT 0x103
+#define ACM_SSIDREF_IN_USE 0x104
+
+
/* primary policy in lower 4 bits */
#define ACM_NULL_POLICY 0
#define ACM_CHINESE_WALL_POLICY 1
@@ -78,7 +91,7 @@
* whenever the interpretation of the related
* policy's data structure changes
*/
-#define ACM_POLICY_VERSION 2
+#define ACM_POLICY_VERSION 3
#define ACM_CHWALL_VERSION 1
#define ACM_STE_VERSION 1
@@ -119,6 +132,14 @@ typedef uint16_t domaintype_t;
/* each offset in bytes from start of the struct they
* are part of */
+/* V3 of the policy buffer aded a version structure */
+struct acm_policy_version
+{
+ uint32_t major;
+ uint32_t minor;
+};
+
+
/* each buffer consists of all policy information for
* the respective policy given in the policy code
*
@@ -136,8 +157,10 @@ struct acm_policy_buffer {
uint32_t primary_buffer_offset;
uint32_t secondary_policy_code;
uint32_t secondary_buffer_offset;
+ struct acm_policy_version xml_pol_version; /* add in V3 */
};
+
struct acm_policy_reference_buffer {
uint32_t len;
};
diff --git a/usr/src/uts/common/xen/public/acm_ops.h b/usr/src/uts/common/xen/public/acm_ops.h
index 5e103dca7b..27a88720a7 100644
--- a/usr/src/uts/common/xen/public/acm_ops.h
+++ b/usr/src/uts/common/xen/public/acm_ops.h
@@ -34,7 +34,7 @@
* This makes sure that old versions of acm tools will stop working in a
* well-defined way (rather than crashing the machine, for instance).
*/
-#define ACM_INTERFACE_VERSION 0xAAAA0008
+#define ACM_INTERFACE_VERSION 0xAAAA000A
/************************************************************************/
@@ -49,8 +49,7 @@
#define ACMOP_setpolicy 1
struct acm_setpolicy {
/* IN */
- uint32_t interface_version;
- XEN_GUEST_HANDLE(void) pushcache;
+ XEN_GUEST_HANDLE_64(void) pushcache;
uint32_t pushcache_size;
};
@@ -58,8 +57,7 @@ struct acm_setpolicy {
#define ACMOP_getpolicy 2
struct acm_getpolicy {
/* IN */
- uint32_t interface_version;
- XEN_GUEST_HANDLE(void) pullcache;
+ XEN_GUEST_HANDLE_64(void) pullcache;
uint32_t pullcache_size;
};
@@ -67,8 +65,7 @@ struct acm_getpolicy {
#define ACMOP_dumpstats 3
struct acm_dumpstats {
/* IN */
- uint32_t interface_version;
- XEN_GUEST_HANDLE(void) pullcache;
+ XEN_GUEST_HANDLE_64(void) pullcache;
uint32_t pullcache_size;
};
@@ -78,20 +75,18 @@ struct acm_dumpstats {
#define ACM_GETBY_domainid 2
struct acm_getssid {
/* IN */
- uint32_t interface_version;
uint32_t get_ssid_by; /* ACM_GETBY_* */
union {
domaintype_t domainid;
ssidref_t ssidref;
} id;
- XEN_GUEST_HANDLE(void) ssidbuf;
+ XEN_GUEST_HANDLE_64(void) ssidbuf;
uint32_t ssidbuf_size;
};
#define ACMOP_getdecision 5
struct acm_getdecision {
/* IN */
- uint32_t interface_version;
uint32_t get_decision_by1; /* ACM_GETBY_* */
uint32_t get_decision_by2; /* ACM_GETBY_* */
union {
@@ -107,6 +102,50 @@ struct acm_getdecision {
uint32_t acm_decision;
};
+
+#define ACMOP_chgpolicy 6
+struct acm_change_policy {
+ /* IN */
+ XEN_GUEST_HANDLE_64(void) policy_pushcache;
+ uint32_t policy_pushcache_size;
+ XEN_GUEST_HANDLE_64(void) del_array;
+ uint32_t delarray_size;
+ XEN_GUEST_HANDLE_64(void) chg_array;
+ uint32_t chgarray_size;
+ /* OUT */
+ /* array with error code */
+ XEN_GUEST_HANDLE_64(void) err_array;
+ uint32_t errarray_size;
+};
+
+#define ACMOP_relabeldoms 7
+struct acm_relabel_doms {
+ /* IN */
+ XEN_GUEST_HANDLE_64(void) relabel_map;
+ uint32_t relabel_map_size;
+ /* OUT */
+ XEN_GUEST_HANDLE_64(void) err_array;
+ uint32_t errarray_size;
+};
+
+/* future interface to Xen */
+struct xen_acmctl {
+ uint32_t cmd;
+ uint32_t interface_version;
+ union {
+ struct acm_setpolicy setpolicy;
+ struct acm_getpolicy getpolicy;
+ struct acm_dumpstats dumpstats;
+ struct acm_getssid getssid;
+ struct acm_getdecision getdecision;
+ struct acm_change_policy change_policy;
+ struct acm_relabel_doms relabel_doms;
+ } u;
+};
+
+typedef struct xen_acmctl xen_acmctl_t;
+DEFINE_XEN_GUEST_HANDLE(xen_acmctl_t);
+
#endif /* __XEN_PUBLIC_ACM_OPS_H__ */
/*
diff --git a/usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h b/usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h
index 8e508f2f57..01f9f982fe 100644
--- a/usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h
+++ b/usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h
@@ -21,7 +21,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
- * Copyright (c) 2004-2006, K A Fraser
+ * Copyright (c) 2004-2007, K A Fraser
*/
#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
@@ -115,6 +115,32 @@
#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
#endif
+/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */
+#undef __DEFINE_XEN_GUEST_HANDLE
+
+#ifdef __GNUC__
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef struct { type *p; } \
+ __guest_handle_ ## name; \
+ typedef struct { union { type *p; uint64_aligned_t q; }; } \
+ __guest_handle_64_ ## name
+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
+#else
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef struct { type *p; } \
+ __guest_handle_ ## name; \
+ typedef struct { union { type *p; uint64_aligned_t q; }u; } \
+ __guest_handle_64_ ## name
+#define uint64_aligned_t uint64_t
+#endif
+
+#undef set_xen_guest_handle
+#define set_xen_guest_handle(hnd, val) \
+ do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \
+ (hnd).p = val; \
+ } while ( 0 )
+#define XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name
+
#ifndef __ASSEMBLY__
struct cpu_user_regs {
diff --git a/usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h b/usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h
index f011db9073..2a63318904 100644
--- a/usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h
+++ b/usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h
@@ -55,7 +55,7 @@
* XXPV HACK, we don't support the hypercall page yet.
* #endif
*/
-
+
/*
* 64-bit segment selectors
* These flat segments are in the Xen-private section of every GDT. Since these
@@ -151,7 +151,10 @@ struct iret_context {
#ifdef __GNUC__
/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
-#define __DECL_REG(name) union { uint64_t r ## name, e ## name; }
+#define __DECL_REG(name) union { \
+ uint64_t r ## name, e ## name; \
+ uint32_t _e ## name; \
+}
#else
/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
#define __DECL_REG(name) uint64_t r ## name
diff --git a/usr/src/uts/common/xen/public/arch-x86/xen.h b/usr/src/uts/common/xen/public/arch-x86/xen.h
index df53dbe9d2..cbecb15eef 100644
--- a/usr/src/uts/common/xen/public/arch-x86/xen.h
+++ b/usr/src/uts/common/xen/public/arch-x86/xen.h
@@ -37,16 +37,23 @@
#endif
#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
-#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
+#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name
+#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name)
+#if !defined(__GNUC__) && defined(__i386__)
+#define set_xen_guest_handle_u(hnd, val) do { (hnd).u.p = val; } while (0)
+#define get_xen_guest_handle_u(val, hnd) do { val = (hnd).u.p; } while (0)
+#else
+#define set_xen_guest_handle_u(hnd, val) do { (hnd).p = val; } while (0)
+#define get_xen_guest_handle_u(val, hnd) do { val = (hnd).p; } while (0)
+#endif
#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
-/*
- * XXPV - we need get in privcmd
- * #ifdef __XEN_TOOLS__
- */
#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
-/*
- * #endif
- */
+
+#if defined(__i386__)
+#include "xen-x86_32.h"
+#elif defined(__x86_64__)
+#include "xen-x86_64.h"
+#endif
#ifndef __ASSEMBLY__
/* Guest handles for primitive C types. */
@@ -60,12 +67,7 @@ DEFINE_XEN_GUEST_HANDLE(void);
typedef unsigned long xen_pfn_t;
DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
-#endif
-
-#if defined(__i386__)
-#include "xen-x86_32.h"
-#elif defined(__x86_64__)
-#include "xen-x86_64.h"
+#define PRI_xen_pfn "lx"
#endif
/*
@@ -130,12 +132,15 @@ struct vcpu_guest_context {
#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events)
#define _VGCF_syscall_disables_events 4
#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events)
+#define _VGCF_online 5
+#define VGCF_online (1<<_VGCF_online)
unsigned long flags; /* VGCF_* flags */
struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
+ /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
#ifdef __i386__
@@ -146,8 +151,18 @@ struct vcpu_guest_context {
#else
unsigned long event_callback_eip;
unsigned long failsafe_callback_eip;
+#ifdef __XEN__
+ union {
+ unsigned long syscall_callback_eip;
+ struct {
+ unsigned int event_callback_cs; /* compat CS of event cb */
+ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */
+ };
+ };
+#else
unsigned long syscall_callback_eip;
#endif
+#endif
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
#ifdef __x86_64__
/* Segment base addresses. */
@@ -176,6 +191,8 @@ struct panic_info {
char *pi_panicstr; /* panic message */
void *pi_ram_start; /* Start of all-RAM mapping region */
void *pi_ram_end; /* End of all-RAM mapping region */
+ void *pi_xen_start; /* Start of Xen's text/heap */
+ void *pi_xen_end; /* End of Xen's text/heap */
void *pi_stktop; /* Top of current Xen stack */
struct domain *pi_domain; /* Panicking domain */
struct vcpu *pi_vcpu; /* Panicking vcpu */
@@ -187,7 +204,7 @@ struct panic_frame {
unsigned long pf_pc;
};
-#define PANIC_INFO_VERSION 1
+#define PANIC_INFO_VERSION 2
#endif /* !__ASSEMBLY__ */
diff --git a/usr/src/uts/common/xen/public/domctl.h b/usr/src/uts/common/xen/public/domctl.h
index 8f176f5412..d061a9785f 100644
--- a/usr/src/uts/common/xen/public/domctl.h
+++ b/usr/src/uts/common/xen/public/domctl.h
@@ -42,11 +42,12 @@
#include "xen.h"
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000004
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005
struct xenctl_cpumap {
- XEN_GUEST_HANDLE(uint8_t) bitmap;
+ XEN_GUEST_HANDLE_64(uint8_t) bitmap;
uint32_t nr_cpus;
+ uint8_t pad[4];
};
/*
@@ -69,6 +70,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
#define XEN_DOMCTL_destroydomain 2
#define XEN_DOMCTL_pausedomain 3
#define XEN_DOMCTL_unpausedomain 4
+#define XEN_DOMCTL_resumedomain 27
#define XEN_DOMCTL_getdomaininfo 5
struct xen_domctl_getdomaininfo {
@@ -92,6 +94,9 @@ struct xen_domctl_getdomaininfo {
/* Domain is currently running. */
#define _XEN_DOMINF_running 5
#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running)
+ /* Being debugged. */
+#define _XEN_DOMINF_debugged 6
+#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged)
/* CPU to which this domain is bound. */
#define XEN_DOMINF_cpumask 255
#define XEN_DOMINF_cpushift 8
@@ -99,14 +104,15 @@ struct xen_domctl_getdomaininfo {
#define XEN_DOMINF_shutdownmask 255
#define XEN_DOMINF_shutdownshift 16
uint32_t flags; /* XEN_DOMINF_* */
- uint64_t tot_pages;
- uint64_t max_pages;
- uint64_t shared_info_frame; /* GMFN of shared_info struct */
- uint64_t cpu_time;
+ uint64_aligned_t tot_pages;
+ uint64_aligned_t max_pages;
+ uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */
+ uint64_aligned_t cpu_time;
uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */
uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
uint32_t ssidref;
xen_domain_handle_t handle;
+ uint8_t pad[4];
};
typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
@@ -116,12 +122,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
struct xen_domctl_getmemlist {
/* IN variables. */
/* Max entries to write to output buffer. */
- uint64_t max_pfns;
+ uint64_aligned_t max_pfns;
/* Start index in guest's page list. */
- uint64_t start_pfn;
- XEN_GUEST_HANDLE(xen_pfn_t) buffer;
+ uint64_aligned_t start_pfn;
+ XEN_GUEST_HANDLE_64(uint64_t) buffer;
/* OUT variables. */
- uint64_t num_pfns;
+ uint64_aligned_t num_pfns;
};
typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);
@@ -130,22 +136,23 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);
#define XEN_DOMCTL_getpageframeinfo 7
#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28
-#define XEN_DOMCTL_PFINFO_NOTAB (0x0<<28)
-#define XEN_DOMCTL_PFINFO_L1TAB (0x1<<28)
-#define XEN_DOMCTL_PFINFO_L2TAB (0x2<<28)
-#define XEN_DOMCTL_PFINFO_L3TAB (0x3<<28)
-#define XEN_DOMCTL_PFINFO_L4TAB (0x4<<28)
-#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7<<28)
-#define XEN_DOMCTL_PFINFO_LPINTAB (0x1<<31)
-#define XEN_DOMCTL_PFINFO_XTAB (0xf<<28) /* invalid page */
-#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xf<<28)
+#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28)
+#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28)
+#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28)
+#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28)
+#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28)
+#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28)
+#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
+#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */
+#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
struct xen_domctl_getpageframeinfo {
/* IN variables. */
- uint64_t gmfn; /* GMFN to query */
+ uint64_aligned_t gmfn; /* GMFN to query */
/* OUT variables. */
/* Is the page PINNED to a type? */
uint32_t type; /* see above type defs */
+ uint8_t pad[4];
};
typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t);
@@ -154,9 +161,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t);
#define XEN_DOMCTL_getpageframeinfo2 8
struct xen_domctl_getpageframeinfo2 {
/* IN variables. */
- uint64_t num;
+ uint64_aligned_t num;
/* IN/OUT variables. */
- XEN_GUEST_HANDLE(ulong) array;
+ XEN_GUEST_HANDLE_64(uint32_t) array;
};
typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
@@ -228,10 +235,11 @@ struct xen_domctl_shadow_op {
/* OP_GET_ALLOCATION / OP_SET_ALLOCATION */
uint32_t mb; /* Shadow memory allocation in MB */
+ uint8_t pad[4];
/* OP_PEEK / OP_CLEAN */
- XEN_GUEST_HANDLE(ulong) dirty_bitmap;
- uint64_t pages; /* Size of buffer. Updated with actual size. */
+ XEN_GUEST_HANDLE_64(uint8_t) dirty_bitmap;
+ uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */
struct xen_domctl_shadow_op_stats stats;
};
typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t;
@@ -241,7 +249,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t);
#define XEN_DOMCTL_max_mem 11
struct xen_domctl_max_mem {
/* IN variables. */
- uint64_t max_memkb;
+ uint64_aligned_t max_memkb;
};
typedef struct xen_domctl_max_mem xen_domctl_max_mem_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t);
@@ -251,7 +259,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t);
#define XEN_DOMCTL_getvcpucontext 13
struct xen_domctl_vcpucontext {
uint32_t vcpu; /* IN */
- XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; /* IN/OUT */
+ uint8_t pad[4];
+ XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
};
typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
@@ -265,8 +274,10 @@ struct xen_domctl_getvcpuinfo {
uint8_t online; /* currently online (not hotplugged)? */
uint8_t blocked; /* blocked waiting for an event? */
uint8_t running; /* currently scheduled on its CPU? */
- uint64_t cpu_time; /* total cpu time consumed (ns) */
+ uint8_t pad1;
+ uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */
uint32_t cpu; /* current mapping */
+ uint8_t pad2[4];
};
typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
@@ -277,6 +288,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
#define XEN_DOMCTL_getvcpuaffinity 25
struct xen_domctl_vcpuaffinity {
uint32_t vcpu; /* IN */
+ uint8_t pad[4];
struct xenctl_cpumap cpumap; /* IN/OUT */
};
typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t;
@@ -303,9 +315,9 @@ struct xen_domctl_scheduler_op {
uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */
union {
struct xen_domctl_sched_sedf {
- uint64_t period;
- uint64_t slice;
- uint64_t latency;
+ uint64_aligned_t period;
+ uint64_aligned_t slice;
+ uint64_aligned_t latency;
uint32_t extratime;
uint32_t weight;
} sedf;
@@ -346,9 +358,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t);
#define XEN_DOMCTL_iomem_permission 20
struct xen_domctl_iomem_permission {
- uint64_t first_mfn; /* first page (physical page number) in range */
- uint64_t nr_mfns; /* number of pages in range (>0) */
- uint8_t allow_access; /* allow (!0) or deny (0) access to range? */
+ uint64_aligned_t first_mfn;/* first page (physical page number) in range */
+ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */
+ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */
};
typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t);
@@ -359,33 +371,37 @@ struct xen_domctl_ioport_permission {
uint32_t first_port; /* first port int range */
uint32_t nr_ports; /* size of port range */
uint8_t allow_access; /* allow or deny access to range? */
+ uint8_t pad[3];
};
typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t);
+
#define XEN_DOMCTL_hypercall_init 22
struct xen_domctl_hypercall_init {
- uint64_t gmfn; /* GMFN to be initialised */
+ uint64_aligned_t gmfn; /* GMFN to be initialised */
};
typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t);
+
#define XEN_DOMCTL_arch_setup 23
#define _XEN_DOMAINSETUP_hvm_guest 0
#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest)
#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */
#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query)
typedef struct xen_domctl_arch_setup {
- uint64_t flags; /* XEN_DOMAINSETUP_* */
+ uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */
#ifdef __ia64__
- uint64_t bp; /* mpaddr of boot param area */
- uint64_t maxmem; /* Highest memory address for MDT. */
- uint64_t xsi_va; /* Xen shared_info area virtual address. */
- uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */
+ uint64_aligned_t bp; /* mpaddr of boot param area */
+ uint64_aligned_t maxmem; /* Highest memory address for MDT. */
+ uint64_aligned_t xsi_va; /* Xen shared_info area virtual address. */
+ uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */
#endif
} xen_domctl_arch_setup_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t);
+
#define XEN_DOMCTL_settimeoffset 24
struct xen_domctl_settimeoffset {
int32_t time_offset_seconds; /* applied to domain wallclock time */
@@ -393,17 +409,53 @@ struct xen_domctl_settimeoffset {
typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
-#define XEN_DOMCTL_real_mode_area 26
+
+#define XEN_DOMCTL_gethvmcontext 33
+#define XEN_DOMCTL_sethvmcontext 34
+typedef struct xen_domctl_hvmcontext {
+ uint32_t size; /* IN/OUT: size of buffer / bytes filled */
+ uint8_t pad[4];
+ XEN_GUEST_HANDLE_64(uint8_t) buffer; /* IN/OUT: data, or call
+ * gethvmcontext with NULL
+ * buffer to get size
+ * req'd */
+} xen_domctl_hvmcontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t);
+
+
+#define XEN_DOMCTL_set_address_size 35
+#define XEN_DOMCTL_get_address_size 36
+typedef struct xen_domctl_address_size {
+ uint32_t size;
+} xen_domctl_address_size_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t);
+
+
+#define XEN_DOMCTL_real_mode_area 26
struct xen_domctl_real_mode_area {
uint32_t log; /* log2 of Real Mode Area size */
};
typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
+#define XEN_DOMCTL_sendtrigger 28
+#define XEN_DOMCTL_SENDTRIGGER_NMI 0
+#define XEN_DOMCTL_SENDTRIGGER_RESET 1
+#define XEN_DOMCTL_SENDTRIGGER_INIT 2
+struct xen_domctl_sendtrigger {
+ uint32_t trigger; /* IN */
+ uint32_t vcpu; /* IN */
+};
+typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);
+
+
struct xen_domctl {
uint32_t cmd;
uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
domid_t domain;
+ uint8_t pad[6];
union {
struct xen_domctl_createdomain createdomain;
struct xen_domctl_getdomaininfo getdomaininfo;
@@ -426,6 +478,9 @@ struct xen_domctl {
struct xen_domctl_arch_setup arch_setup;
struct xen_domctl_settimeoffset settimeoffset;
struct xen_domctl_real_mode_area real_mode_area;
+ struct xen_domctl_hvmcontext hvmcontext;
+ struct xen_domctl_address_size address_size;
+ struct xen_domctl_sendtrigger sendtrigger;
uint8_t pad[128];
} u;
};
diff --git a/usr/src/uts/common/xen/public/elfnote.h b/usr/src/uts/common/xen/public/elfnote.h
index 4924767d30..77be41bb4b 100644
--- a/usr/src/uts/common/xen/public/elfnote.h
+++ b/usr/src/uts/common/xen/public/elfnote.h
@@ -28,7 +28,7 @@
#define __XEN_PUBLIC_ELFNOTE_H__
/*
- * The notes should live in a SHT_NOTE segment and have "Xen" in the
+ * The notes should live in a PT_NOTE segment and have "Xen" in the
* name field.
*
* Numeric types are either 4 or 8 bytes depending on the content of
@@ -40,8 +40,6 @@
/*
* NAME=VALUE pair (string).
- *
- * LEGACY: FEATURES and PAE
*/
#define XEN_ELFNOTE_INFO 0
@@ -108,7 +106,12 @@
#define XEN_ELFNOTE_LOADER 8
/*
- * The kernel supports PAE (x86/32 only, string = "yes" or "no").
+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or
+ * "bimodal").
+ *
+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
+ * may be given as "yes,bimodal" which will cause older Xen to treat
+ * this kernel as PAE.
*
* LEGACY: PAE (n.b. The legacy interface included a provision to
* indicate 'extended-cr3' support allowing L3 page tables to be
@@ -148,6 +151,22 @@
#define XEN_ELFNOTE_HV_START_LOW 12
/*
+ * List of maddr_t-sized mask/value pairs describing how to recognize
+ * (non-present) L1 page table entries carrying valid MFNs (numeric).
+ */
+#define XEN_ELFNOTE_L1_MFN_VALID 13
+
+/*
+ * Whether or not the guest supports cooperative suspend cancellation.
+ */
+#define XEN_ELFNOTE_SUSPEND_CANCEL 14
+
+/*
+ * The number of the highest elfnote defined.
+ */
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL
+
+/*
* System information exported through crash notes.
*
* The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
@@ -166,6 +185,41 @@
*/
#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
+/*
+ * xen dump-core none note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
+ * in its dump file to indicate that the file is xen dump-core
+ * file. This note doesn't have any other information.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000
+
+/*
+ * xen dump-core header note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
+ * in its dump file.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001
+
+/*
+ * xen dump-core xen version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
+ * in its dump file. It contains the xen version obtained via the
+ * XENVER hypercall.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002
+
+/*
+ * xen dump-core format version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
+ * in its dump file. It contains a format version identifier.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003
+
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
/*
diff --git a/usr/src/uts/common/xen/public/event_channel.h b/usr/src/uts/common/xen/public/event_channel.h
index 62cf764040..d35cce53e4 100644
--- a/usr/src/uts/common/xen/public/event_channel.h
+++ b/usr/src/uts/common/xen/public/event_channel.h
@@ -217,6 +217,19 @@ struct evtchn_unmask {
typedef struct evtchn_unmask evtchn_unmask_t;
/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ */
+#define EVTCHNOP_reset 10
+struct evtchn_reset {
+ /* IN parameters. */
+ domid_t dom;
+};
+typedef struct evtchn_reset evtchn_reset_t;
+
+/*
* Argument to event_channel_op_compat() hypercall. Superceded by new
* event_channel_op() hypercall since 0x00030202.
*/
diff --git a/usr/src/uts/common/xen/public/grant_table.h b/usr/src/uts/common/xen/public/grant_table.h
index 9622b56d02..222ac37b86 100644
--- a/usr/src/uts/common/xen/public/grant_table.h
+++ b/usr/src/uts/common/xen/public/grant_table.h
@@ -309,6 +309,25 @@ typedef struct gnttab_copy {
} gnttab_copy_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
+/*
+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared
+ * grant table.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_query_size 6
+struct gnttab_query_size {
+ /* IN parameters. */
+ domid_t dom;
+ /* OUT parameters. */
+ uint32_t nr_frames;
+ uint32_t max_nr_frames;
+ int16_t status; /* GNTST_* */
+};
+typedef struct gnttab_query_size gnttab_query_size_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+
/*
* Bitfield values for update_pin_status.flags.
diff --git a/usr/src/uts/common/xen/public/hvm/hvm_op.h b/usr/src/uts/common/xen/public/hvm/hvm_op.h
index 8322f32ee2..b21b0f7abe 100644
--- a/usr/src/uts/common/xen/public/hvm/hvm_op.h
+++ b/usr/src/uts/common/xen/public/hvm/hvm_op.h
@@ -1,3 +1,23 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
#define __XEN_PUBLIC_HVM_HVM_OP_H__
@@ -50,4 +70,7 @@ struct xen_hvm_set_pci_link_route {
typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t;
DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);
+/* Flushes all VCPU TLBs: @arg must be NULL. */
+#define HVMOP_flush_tlbs 5
+
#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/usr/src/uts/common/xen/public/hvm/params.h b/usr/src/uts/common/xen/public/hvm/params.h
index caa1f1f545..9657654870 100644
--- a/usr/src/uts/common/xen/public/hvm/params.h
+++ b/usr/src/uts/common/xen/public/hvm/params.h
@@ -1,4 +1,3 @@
-
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
@@ -24,13 +23,38 @@
#include "hvm_op.h"
-/* Parameter space for HVMOP_{set,get}_param. */
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
+ * Domain = val[47:32], Bus = val[31:16],
+ * DevFn = val[15: 8], IntX = val[ 1: 0]
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ */
#define HVM_PARAM_CALLBACK_IRQ 0
+
+/*
+ * These are not used by Xen. They are here for convenience of HVM-guest
+ * xenbus implementations.
+ */
#define HVM_PARAM_STORE_PFN 1
#define HVM_PARAM_STORE_EVTCHN 2
+
#define HVM_PARAM_PAE_ENABLED 4
+
#define HVM_PARAM_IOREQ_PFN 5
+
#define HVM_PARAM_BUFIOREQ_PFN 6
+
+#ifdef __ia64__
+#define HVM_PARAM_NVRAM_FD 7
+#define HVM_NR_PARAMS 8
+#else
#define HVM_NR_PARAMS 7
+#endif
#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
diff --git a/usr/src/uts/common/xen/public/io/blkif.h b/usr/src/uts/common/xen/public/io/blkif.h
index 4d33926f1e..fde78b9dbb 100644
--- a/usr/src/uts/common/xen/public/io/blkif.h
+++ b/usr/src/uts/common/xen/public/io/blkif.h
@@ -73,18 +73,20 @@
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+struct blkif_request_segment {
+ grant_ref_t gref; /* reference to I/O buffer frame */
+ /* @first_sect: first sector in frame to transfer (inclusive). */
+ /* @last_sect: last sector in frame to transfer (inclusive). */
+ uint8_t first_sect, last_sect;
+};
+
struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
- struct blkif_request_segment {
- grant_ref_t gref; /* reference to I/O buffer frame */
- /* @first_sect: first sector in frame to transfer (inclusive). */
- /* @last_sect: last sector in frame to transfer (inclusive). */
- uint8_t first_sect, last_sect;
- } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
typedef struct blkif_request blkif_request_t;
diff --git a/usr/src/uts/common/xen/public/io/protocols.h b/usr/src/uts/common/xen/public/io/protocols.h
new file mode 100644
index 0000000000..0e78452c81
--- /dev/null
+++ b/usr/src/uts/common/xen/public/io/protocols.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ * protocols.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi"
+#define XEN_IO_PROTO_ABI_IA64 "ia64-abi"
+#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__ia64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
+#elif defined(__powerpc64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64
+#else
+# error arch fixup needed here
+#endif
+
+#endif
diff --git a/usr/src/uts/common/xen/public/io/xs_wire.h b/usr/src/uts/common/xen/public/io/xs_wire.h
index 825ea45144..8b841bcfea 100644
--- a/usr/src/uts/common/xen/public/io/xs_wire.h
+++ b/usr/src/uts/common/xen/public/io/xs_wire.h
@@ -45,7 +45,8 @@ enum xsd_sockmsg_type
XS_SET_PERMS,
XS_WATCH_EVENT,
XS_ERROR,
- XS_IS_DOMAIN_INTRODUCED
+ XS_IS_DOMAIN_INTRODUCED,
+ XS_RESUME
};
#define XS_WRITE_NONE "NONE"
@@ -59,26 +60,12 @@ struct xsd_errors
const char *errstring;
};
#define XSD_ERROR(x) { x, #x }
-#if !defined(__GNUC__)
/* LINTED: static unused */
-static struct xsd_errors xsd_errors[] = {
- XSD_ERROR(EINVAL),
- XSD_ERROR(EACCES),
- XSD_ERROR(EEXIST),
- XSD_ERROR(EISDIR),
- XSD_ERROR(ENOENT),
- XSD_ERROR(ENOMEM),
- XSD_ERROR(ENOSPC),
- XSD_ERROR(EIO),
- XSD_ERROR(ENOTEMPTY),
- XSD_ERROR(ENOSYS),
- XSD_ERROR(EROFS),
- XSD_ERROR(EBUSY),
- XSD_ERROR(EAGAIN),
- XSD_ERROR(EISCONN)
-};
-#else
-static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
+static struct xsd_errors xsd_errors[]
+#if defined(__GNUC__)
+__attribute__((unused))
+#endif
+ = {
XSD_ERROR(EINVAL),
XSD_ERROR(EACCES),
XSD_ERROR(EEXIST),
@@ -94,7 +81,6 @@ static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
XSD_ERROR(EAGAIN),
XSD_ERROR(EISCONN)
};
-#endif
struct xsd_sockmsg
{
diff --git a/usr/src/uts/common/xen/public/memory.h b/usr/src/uts/common/xen/public/memory.h
index 97a6bbfea2..7b1ef44d32 100644
--- a/usr/src/uts/common/xen/public/memory.h
+++ b/usr/src/uts/common/xen/public/memory.h
@@ -129,6 +129,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
#define XENMEM_maximum_reservation 4
/*
+ * Returns the maximum GPFN in use by the guest, or -ve errcode on failure.
+ */
+#define XENMEM_maximum_gpfn 14
+
+/*
* Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
* mapping table. Architectures which do not have a m2p table do not implement
* this command.
diff --git a/usr/src/uts/common/xen/public/platform.h b/usr/src/uts/common/xen/public/platform.h
index ec9bd604c5..d8de4bef60 100644
--- a/usr/src/uts/common/xen/public/platform.h
+++ b/usr/src/uts/common/xen/public/platform.h
@@ -114,6 +114,45 @@ struct xenpf_platform_quirk {
typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
+#define XENPF_firmware_info 50
+#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+ /* IN variables. */
+ uint32_t type;
+ uint32_t index;
+ /* OUT variables. */
+ union {
+ struct {
+ /* Int13, Fn48: Check Extensions Present. */
+ uint8_t device; /* %dl: bios device number */
+ uint8_t version; /* %ah: major version */
+ uint16_t interface_support; /* %cx: support bitmap */
+ /* Int13, Fn08: Legacy Get Device Parameters. */
+ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */
+ uint8_t legacy_max_head; /* %dh: max head # */
+ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */
+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+ /* NB. First uint16_t of buffer must be set to buffer size. */
+ XEN_GUEST_HANDLE(void) edd_params;
+ } disk_info; /* XEN_FW_DISK_INFO */
+ struct {
+ uint8_t device; /* bios device number */
+ uint32_t mbr_signature; /* offset 0x1b8 in mbr */
+ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+ struct {
+ /* Int10, AX=4F15: Get EDID info. */
+ uint8_t capabilities;
+ uint8_t edid_transfer_time;
+ /* must refer to 128-byte buffer */
+ XEN_GUEST_HANDLE(uint8_t) edid;
+ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+ } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
+
#define XENPF_panic_init 40
struct xenpf_panic_init {
unsigned long panic_addr;
@@ -131,6 +170,7 @@ struct xen_platform_op {
struct xenpf_read_memtype read_memtype;
struct xenpf_microcode_update microcode;
struct xenpf_platform_quirk platform_quirk;
+ struct xenpf_firmware_info firmware_info;
struct xenpf_panic_init panic_init;
uint8_t pad[128];
} u;
diff --git a/usr/src/uts/common/xen/public/sysctl.h b/usr/src/uts/common/xen/public/sysctl.h
index db2091cbcf..5064b261e3 100644
--- a/usr/src/uts/common/xen/public/sysctl.h
+++ b/usr/src/uts/common/xen/public/sysctl.h
@@ -41,7 +41,7 @@
#include "xen.h"
#include "domctl.h"
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000002
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000003
/*
* Read console content from Xen buffer ring.
@@ -50,9 +50,11 @@
struct xen_sysctl_readconsole {
/* IN variables. */
uint32_t clear; /* Non-zero -> clear after reading. */
- XEN_GUEST_HANDLE(char) buffer; /* Buffer start */
+ uint8_t pad1[4];
+ XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */
/* IN/OUT variables. */
uint32_t count; /* In: Buffer size; Out: Used buffer size */
+ uint8_t pad2[4];
};
typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t);
@@ -68,11 +70,12 @@ struct xen_sysctl_tbuf_op {
#define XEN_SYSCTL_TBUFOP_enable 4
#define XEN_SYSCTL_TBUFOP_disable 5
uint32_t cmd;
+ uint8_t pad[4];
/* IN/OUT variables */
struct xenctl_cpumap cpu_mask;
uint32_t evt_mask;
/* OUT variables */
- uint64_t buffer_mfn;
+ uint64_aligned_t buffer_mfn;
uint32_t size;
};
typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t;
@@ -88,9 +91,10 @@ struct xen_sysctl_physinfo {
uint32_t sockets_per_node;
uint32_t nr_nodes;
uint32_t cpu_khz;
- uint64_t total_pages;
- uint64_t free_pages;
- uint64_t scrub_pages;
+ uint8_t pad[4];
+ uint64_aligned_t total_pages;
+ uint64_aligned_t free_pages;
+ uint64_aligned_t scrub_pages;
uint32_t hw_cap[8];
};
typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;
@@ -127,10 +131,11 @@ struct xen_sysctl_perfc_op {
/* OUT variables. */
uint32_t nr_counters; /* number of counters description */
uint32_t nr_vals; /* number of values */
+ uint8_t pad[4];
/* counter information (or NULL) */
- XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc;
+ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;
/* counter values (or NULL) */
- XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val;
+ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
};
typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
@@ -139,14 +144,46 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
struct xen_sysctl_getdomaininfolist {
/* IN variables. */
domid_t first_domain;
+ uint8_t pad1[2];
uint32_t max_domains;
- XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t) buffer;
+ XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;
/* OUT variables. */
uint32_t num_domains;
+ uint8_t pad2[4];
};
typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
+/* Inject debug keys into Xen. */
+#define XEN_SYSCTL_debug_keys 7
+struct xen_sysctl_debug_keys {
+ /* IN variables. */
+ XEN_GUEST_HANDLE_64(char) keys;
+ uint32_t nr_keys;
+ uint8_t pad[4];
+};
+typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
+
+/* Get physical CPU information. */
+#define XEN_SYSCTL_getcpuinfo 8
+struct xen_sysctl_cpuinfo {
+ uint64_t idletime;
+};
+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t);
+struct xen_sysctl_getcpuinfo {
+ /* IN variables. */
+ uint32_t max_cpus;
+ uint8_t pad1[4];
+ XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
+ /* OUT variables. */
+ uint32_t nr_cpus;
+ uint8_t pad2[4];
+};
+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t);
+
struct xen_sysctl {
uint32_t cmd;
uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
@@ -157,6 +194,8 @@ struct xen_sysctl {
struct xen_sysctl_sched_id sched_id;
struct xen_sysctl_perfc_op perfc_op;
struct xen_sysctl_getdomaininfolist getdomaininfolist;
+ struct xen_sysctl_debug_keys debug_keys;
+ struct xen_sysctl_getcpuinfo getcpuinfo;
uint8_t pad[128];
} u;
};
diff --git a/usr/src/uts/common/xen/public/vcpu.h b/usr/src/uts/common/xen/public/vcpu.h
index 12df6dbe15..a84eb51310 100644
--- a/usr/src/uts/common/xen/public/vcpu.h
+++ b/usr/src/uts/common/xen/public/vcpu.h
@@ -42,13 +42,13 @@
* @extra_arg == pointer to vcpu_guest_context structure containing initial
* state for the VCPU.
*/
-#define VCPUOP_initialise 0
+#define VCPUOP_initialise 0
/*
* Bring up a VCPU. This makes the VCPU runnable. This operation will fail
* if the VCPU has not been initialised (VCPUOP_initialise).
*/
-#define VCPUOP_up 1
+#define VCPUOP_up 1
/*
* Bring down a VCPU (i.e., make it non-runnable).
@@ -64,16 +64,16 @@
* practise to move a VCPU onto an 'idle' or default page table, LDT and
* GDT before bringing it down.
*/
-#define VCPUOP_down 2
+#define VCPUOP_down 2
/* Returns 1 if the given VCPU is up. */
-#define VCPUOP_is_up 3
+#define VCPUOP_is_up 3
/*
* Return information about the state and running time of a VCPU.
* @extra_arg == pointer to vcpu_runstate_info structure.
*/
-#define VCPUOP_get_runstate_info 4
+#define VCPUOP_get_runstate_info 4
struct vcpu_runstate_info {
/* VCPU's current state (RUNSTATE_*). */
int state;
@@ -128,6 +128,56 @@ struct vcpu_register_runstate_memory_area {
} addr;
};
typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);
+
+/*
+ * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
+ * which can be set via these commands. Periods smaller than one millisecond
+ * may not be supported.
+ */
+#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */
+#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */
+struct vcpu_set_periodic_timer {
+ uint64_t period_ns;
+};
+typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
+
+/*
+ * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
+ * timer which can be set via these commands.
+ */
+#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */
+#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
+struct vcpu_set_singleshot_timer {
+ uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */
+ uint32_t flags; /* VCPU_SSHOTTMR_??? */
+};
+typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
+
+/* Flags to VCPUOP_set_singleshot_timer. */
+ /* Require the timeout to be in the future (return -ETIME if it's passed). */
+#define _VCPU_SSHOTTMR_future (0)
+#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future)
+
+/*
+ * Register a memory location in the guest address space for the
+ * vcpu_info structure. This allows the guest to place the vcpu_info
+ * structure in a convenient place, such as in a per-cpu data area.
+ * The pointer need not be page aligned, but the structure must not
+ * cross a page boundary.
+ *
+ * This may be called only once per vcpu.
+ */
+#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */
+struct vcpu_register_vcpu_info {
+ uint64_t mfn; /* mfn of page to place vcpu_info */
+ uint32_t offset; /* offset within page */
+ uint32_t rsvd; /* unused */
+};
+typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
#endif /* __XEN_PUBLIC_VCPU_H__ */
diff --git a/usr/src/uts/common/xen/public/xen.h b/usr/src/uts/common/xen/public/xen.h
index 319f65ef08..b1b910c798 100644
--- a/usr/src/uts/common/xen/public/xen.h
+++ b/usr/src/uts/common/xen/public/xen.h
@@ -30,7 +30,7 @@
#include "xen-compat.h"
#if defined(__i386) && !defined(__i386__)
-#define __i386__
+#define __i386__ /* foo */
#endif
#if defined(__amd64) && !defined(__x86_64__)
@@ -143,6 +143,7 @@
#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */
#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */
#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */
/* Architecture-specific VIRQ definitions. */
#define VIRQ_ARCH_0 16
@@ -421,7 +422,9 @@ struct vcpu_info {
struct arch_vcpu_info arch;
struct vcpu_time_info time;
}; /* 64 bytes (x86) */
+#ifndef __XEN__
typedef struct vcpu_info vcpu_info_t;
+#endif
/*
* Xen/kernel shared data -- pointer provided in start_info.
@@ -479,29 +482,29 @@ struct shared_info {
struct arch_shared_info arch;
};
+#ifndef __XEN__
typedef struct shared_info shared_info_t;
+#endif
/*
- * Start-of-day memory layout for the initial domain (DOM0):
+ * Start-of-day memory layout:
* 1. The domain is started within contiguous virtual-memory region.
- * 2. The contiguous region begins and ends on an aligned 4MB boundary.
- * 3. The region start corresponds to the load address of the OS image.
- * If the load address is not 4MB aligned then the address is rounded down.
- * 4. This the order of bootstrap elements in the initial virtual region:
+ * 2. The contiguous region ends on an aligned 4MB boundary.
+ * 3. This the order of bootstrap elements in the initial virtual region:
* a. relocated kernel image
* b. initial ram disk [mod_start, mod_len]
* c. list of allocated page frames [mfn_list, nr_pages]
* d. start_info_t structure [register ESI (x86)]
* e. bootstrap page tables [pt_base, CR3 (x86)]
* f. bootstrap stack [register ESP (x86)]
- * 5. Bootstrap elements are packed together, but each is 4kB-aligned.
- * 6. The initial ram disk may be omitted.
- * 7. The list of page frames forms a contiguous 'pseudo-physical' memory
+ * 4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ * 5. The initial ram disk may be omitted.
+ * 6. The list of page frames forms a contiguous 'pseudo-physical' memory
* layout for the domain. In particular, the bootstrap virtual-memory
* region is a 1:1 mapping to the first section of the pseudo-physical map.
- * 8. All bootstrap elements are mapped read-writable for the guest OS. The
+ * 7. All bootstrap elements are mapped read-writable for the guest OS. The
* only exception is the bootstrap page table, which is mapped read-only.
- * 9. There is guaranteed to be at least 512kB padding after the final
+ * 8. There is guaranteed to be at least 512kB padding after the final
* bootstrap element. If necessary, the bootstrap virtual region is
* extended by an extra 4MB to ensure this.
*/
@@ -583,6 +586,8 @@ typedef struct dom0_vga_console_info {
} vesa_lfb;
} u;
} dom0_vga_console_info_t;
+#define xen_vga_console_info dom0_vga_console_info
+#define xen_vga_console_info_t dom0_vga_console_info_t
typedef uint8_t xen_domain_handle_t[16];
@@ -602,6 +607,21 @@ DEFINE_XEN_GUEST_HANDLE(uint64_t);
#endif /* !__ASSEMBLY__ */
+/* Default definitions for macros used by domctl/sysctl. */
+/*
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+*/
+#ifndef uint64_aligned_t
+#define uint64_aligned_t uint64_t
+#endif
+#ifndef XEN_GUEST_HANDLE_64
+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)
+#endif
+/*
+#endif
+*/
+
+
#endif /* __XEN_PUBLIC_XEN_H__ */
/*
diff --git a/usr/src/uts/common/xen/sys/gnttab.h b/usr/src/uts/common/xen/sys/gnttab.h
index 7066ae3243..eee9c27fbe 100644
--- a/usr/src/uts/common/xen/sys/gnttab.h
+++ b/usr/src/uts/common/xen/sys/gnttab.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -40,8 +40,11 @@
* Copyright (c) 2004-2005, K A Fraser
* Copyright (c) 2005, Christopher Clark
*
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
@@ -64,18 +67,12 @@
#include <sys/hypervisor.h>
#include <xen/public/grant_table.h>
+#include <xen/public/features.h>
#ifdef __cplusplus
extern "C" {
#endif
-/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
-#ifdef __ia64__
-#define NR_GRANT_FRAMES 1
-#else
-#define NR_GRANT_FRAMES 4
-#endif
-
struct gnttab_free_callback {
struct gnttab_free_callback *next;
void (*fn)(void *);
@@ -107,7 +104,7 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
gnttab_frame_t page);
-int gnttab_grant_foreign_transfer(domid_t domid);
+int gnttab_grant_foreign_transfer(domid_t domid, pfn_t pfn);
gnttab_frame_t gnttab_end_foreign_transfer_ref(grant_ref_t ref);
gnttab_frame_t gnttab_end_foreign_transfer(grant_ref_t ref);
@@ -123,6 +120,8 @@ void gnttab_free_grant_reference(grant_ref_t ref);
void gnttab_free_grant_references(grant_ref_t head);
+int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
+
int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
void gnttab_release_grant_reference(grant_ref_t *private_head,
@@ -131,10 +130,13 @@ void gnttab_release_grant_reference(grant_ref_t *private_head,
void gnttab_request_free_callback(struct gnttab_free_callback *callback,
void (*fn)(void *), void *arg, uint16_t count);
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
+
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
gnttab_frame_t frame, int readonly);
-void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid);
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+ pfn_t pfn);
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
diff --git a/usr/src/uts/i86xpv/io/privcmd.c b/usr/src/uts/i86xpv/io/privcmd.c
index 5660a2cdcb..7a3672e5d7 100644
--- a/usr/src/uts/i86xpv/io/privcmd.c
+++ b/usr/src/uts/i86xpv/io/privcmd.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -235,8 +235,13 @@ do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
}
if (mfn == MFN_INVALID) {
- error = EINVAL;
- break;
+ /*
+ * This mfn is invalid and should not be added to
+ * segmf, as we'd only cause an immediate EFAULT when
+ * we tried to fault it in.
+ */
+ mfn |= XEN_DOMCTL_PFINFO_XTAB;
+ continue;
}
if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
@@ -246,7 +251,7 @@ do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
* Tell the process that this MFN could not be mapped, so it
* won't later try to access it.
*/
- mfn |= 0xf0000000;
+ mfn |= XEN_DOMCTL_PFINFO_XTAB;
if (sulword(ulp, mfn) != 0) {
error = EFAULT;
break;
diff --git a/usr/src/uts/i86xpv/io/privcmd_hcall.c b/usr/src/uts/i86xpv/io/privcmd_hcall.c
index 2259756c30..6de5a69788 100644
--- a/usr/src/uts/i86xpv/io/privcmd_hcall.c
+++ b/usr/src/uts/i86xpv/io/privcmd_hcall.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -81,12 +81,16 @@ import_buffer(import_export_t *iep, void *uaddr, void *kaddr, size_t size,
iep->ie_flags |= IE_FREE;
} else {
iep->ie_kaddr = kaddr;
+ iep->ie_flags &= ~IE_FREE;
}
if ((flags & IE_IMPORT) &&
(ddi_copyin(uaddr, iep->ie_kaddr, size, 0) != 0)) {
- if (iep->ie_flags & IE_FREE)
+ if (iep->ie_flags & IE_FREE) {
kmem_free(iep->ie_kaddr, iep->ie_size);
+ iep->ie_kaddr = NULL;
+ iep->ie_flags = 0;
+ }
return (-X_EFAULT);
}
@@ -109,8 +113,11 @@ export_buffer(import_export_t *iep, int *error)
if ((iep->ie_flags & IE_EXPORT) && (*error >= 0) &&
(ddi_copyout(iep->ie_kaddr, iep->ie_uaddr, iep->ie_size, 0) != 0))
copy_err = -X_EFAULT;
- if (iep->ie_flags & IE_FREE)
+ if (iep->ie_flags & IE_FREE) {
kmem_free(iep->ie_kaddr, iep->ie_size);
+ iep->ie_kaddr = NULL;
+ iep->ie_flags = 0;
+ }
if (copy_err != 0 && *error >= 0)
*error = copy_err;
@@ -135,8 +142,10 @@ import_handle(import_export_t *iep, void *field, size_t size, int flags)
/*LINTED: constant in conditional context*/
get_xen_guest_handle(ptr, (*hdl));
err = import_buffer(iep, ptr, NULL, size, (flags));
- /*LINTED: constant in conditional context*/
- set_xen_guest_handle((*hdl), (void *)((iep)->ie_kaddr));
+ if (err == 0) {
+ /*LINTED: constant in conditional context*/
+ set_xen_guest_handle((*hdl), (void *)((iep)->ie_kaddr));
+ }
return (err);
}
@@ -189,6 +198,10 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
* Check this first because our wrapper will forcibly overwrite it.
*/
if (op.interface_version != XEN_DOMCTL_INTERFACE_VERSION) {
+#ifdef DEBUG
+ printf("domctl vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
+ op.cmd, op.interface_version, XEN_DOMCTL_INTERFACE_VERSION);
+#endif
error = -X_EACCES;
export_buffer(&op_ie, &error);
return (error);
@@ -240,8 +253,8 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
size = roundup(howmany(op.u.shadow_op.pages, NBBY),
sizeof (ulong_t));
- error = import_handle(&sub_ie, &op.u.shadow_op.dirty_bitmap,
- size, IE_IMPEXP);
+ error = import_handle(&sub_ie,
+ &op.u.shadow_op.dirty_bitmap, size, IE_IMPEXP);
break;
}
@@ -254,7 +267,7 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
sizeof (vcpu_guest_context_t), IE_IMPORT);
if (error == -X_EFAULT)
/*LINTED: constant in conditional context*/
- get_xen_guest_handle(taddr, op.u.vcpucontext.ctxt);
+ get_xen_guest_handle_u(taddr, op.u.vcpucontext.ctxt);
else
taddr = sub_ie.ie_kaddr;
DTRACE_XPV2(setvcpucontext__start, domid_t, op.domain,
@@ -268,6 +281,25 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
break;
}
+
+ case XEN_DOMCTL_sethvmcontext: {
+ error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
+ op.u.hvmcontext.size, IE_IMPORT);
+ break;
+ }
+
+ case XEN_DOMCTL_gethvmcontext: {
+#if !defined(__GNUC__) && defined(__i386__)
+ if (op.u.hvmcontext.buffer.u.p != NULL)
+#else
+ if (op.u.hvmcontext.buffer.p != NULL)
+#endif
+ error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
+ op.u.hvmcontext.size, IE_EXPORT);
+ break;
+ }
+
+ case XEN_DOMCTL_resumedomain:
case XEN_DOMCTL_getvcpuinfo:
case XEN_DOMCTL_setvcpuaffinity:
case XEN_DOMCTL_getvcpuaffinity:
@@ -282,6 +314,8 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
case XEN_DOMCTL_arch_setup:
case XEN_DOMCTL_settimeoffset:
case XEN_DOMCTL_real_mode_area:
+ case XEN_DOMCTL_set_address_size:
+ case XEN_DOMCTL_sendtrigger:
break;
default:
@@ -348,6 +382,12 @@ privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp)
break;
}
+ case XEN_SYSCTL_debug_keys: {
+ error = import_handle(&sub_ie, &op.u.debug_keys.keys,
+ op.u.debug_keys.nr_keys, IE_IMPORT);
+ break;
+ }
+
case XEN_SYSCTL_tbuf_op:
case XEN_SYSCTL_physinfo:
case XEN_SYSCTL_sched_id:
@@ -362,7 +402,7 @@ privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp)
* before wiring down the output buffer appropriately.
*/
/*LINTED: constant in conditional context*/
- get_xen_guest_handle(scdp, op.u.perfc_op.desc);
+ get_xen_guest_handle_u(scdp, op.u.perfc_op.desc);
if (scdp != NULL) {
static int numcounters = -1;
@@ -394,6 +434,11 @@ privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp)
break;
}
+ case XEN_SYSCTL_getcpuinfo:
+ error = import_handle(&sub_ie, &op.u.getcpuinfo.info,
+ op.u.getcpuinfo.max_cpus *
+ sizeof (xen_sysctl_cpuinfo_t), IE_EXPORT);
+ break;
default:
#ifdef DEBUG
printf("unrecognized HYPERVISOR_sysctl %d\n", op.cmd);
@@ -532,6 +577,7 @@ privcmd_HYPERVISOR_memory_op(int cmd, void *arg)
case XENMEM_current_reservation:
case XENMEM_maximum_reservation:
+ case XENMEM_maximum_gpfn:
if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.domid),
IE_IMPEXP) != 0)
return (-X_EFAULT);
@@ -693,6 +739,10 @@ privcmd_HYPERVISOR_event_channel_op(int cmd, void *arg)
size = sizeof (evtchn_unmask_t);
flags = IE_IMPORT;
break;
+ case EVTCHNOP_reset:
+ size = sizeof (evtchn_reset_t);
+ flags = IE_IMPORT;
+ break;
default:
#ifdef DEBUG
@@ -771,44 +821,48 @@ privcmd_HYPERVISOR_xen_version(int cmd, void *arg)
}
static int
-privcmd_HYPERVISOR_acm_op(int cmd, void *arg)
+privcmd_HYPERVISOR_acm_op(void *uacmctl)
{
int error;
- int size = 0;
+ struct xen_acmctl *acmctl;
import_export_t op_ie;
- uint32_t flags;
- switch (cmd) {
+ error = import_buffer(&op_ie, uacmctl, NULL, sizeof (*acmctl),
+ IE_IMPEXP);
+ if (error != 0)
+ return (error);
+
+ acmctl = op_ie.ie_kaddr;
+
+ if (acmctl->interface_version != ACM_INTERFACE_VERSION) {
+#ifdef DEBUG
+ printf("acm vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
+ acmctl->cmd, acmctl->interface_version,
+ ACM_INTERFACE_VERSION);
+#endif
+ error = -X_EACCES;
+ export_buffer(&op_ie, &error);
+ return (error);
+ }
+
+ switch (acmctl->cmd) {
case ACMOP_setpolicy:
- size = sizeof (struct acm_setpolicy);
- flags = IE_IMPORT;
- break;
case ACMOP_getpolicy:
- size = sizeof (struct acm_getpolicy);
- flags = IE_IMPORT;
- break;
case ACMOP_dumpstats:
- size = sizeof (struct acm_dumpstats);
- flags = IE_IMPORT;
- break;
case ACMOP_getssid:
- size = sizeof (struct acm_getssid);
- flags = IE_IMPORT;
- break;
case ACMOP_getdecision:
- size = sizeof (struct acm_getdecision);
- flags = IE_IMPEXP;
+ case ACMOP_chgpolicy:
+ case ACMOP_relabeldoms:
break;
default:
#ifdef DEBUG
- printf("unrecognized HYPERVISOR_acm_op op %d\n", cmd);
+ printf("unrecognized HYPERVISOR_acm_op op %d\n", acmctl->cmd);
#endif
return (-X_EINVAL);
}
- error = import_buffer(&op_ie, arg, NULL, size, flags);
if (error == 0)
- error = HYPERVISOR_acm_op(cmd, op_ie.ie_kaddr);
+ error = HYPERVISOR_acm_op(acmctl);
export_buffer(&op_ie, &error);
return (error);
@@ -966,8 +1020,7 @@ do_privcmd_hypercall(void *uarg, int mode, cred_t *cr, int *rval)
(uint_t *)hc->arg[2], (domid_t)hc->arg[3]);
break;
case __HYPERVISOR_acm_op:
- error = privcmd_HYPERVISOR_acm_op(
- (int)hc->arg[0], (void *)hc->arg[1]);
+ error = privcmd_HYPERVISOR_acm_op((void *)hc->arg[0]);
break;
case __HYPERVISOR_hvm_op:
error = privcmd_HYPERVISOR_hvm_op(
diff --git a/usr/src/uts/i86xpv/ml/xenguest.s b/usr/src/uts/i86xpv/ml/xenguest.s
index 187abcb52f..2d68e640a9 100644
--- a/usr/src/uts/i86xpv/ml/xenguest.s
+++ b/usr/src/uts/i86xpv/ml/xenguest.s
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -61,15 +61,7 @@ int __lint_xen_guest;
NOTE("Xen", XEN_ELFNOTE_VIRT_BASE, .4byte, 0x40000000)
NOTE("Xen", XEN_ELFNOTE_PADDR_OFFSET, .4byte, 0x40000000)
#if defined(__i386)
- /*
- * NB: If you want to build a kernel that works on a non-PAE
- * hypervisor, just comment out the next line and rebuild Solaris.
- * It'll just work, the kernel figures everything out dynamically.
- */
NOTE("Xen", XEN_ELFNOTE_PAE_MODE, .string, "yes,bimodal")
- /*
- * XXPV: implement XEN_ELFNOTE_HV_START_LOW
- */
#endif
#endif /* __lint */
diff --git a/usr/src/uts/i86xpv/os/xpv_panic.c b/usr/src/uts/i86xpv/os/xpv_panic.c
index 191485ffcd..713279246a 100644
--- a/usr/src/uts/i86xpv/os/xpv_panic.c
+++ b/usr/src/uts/i86xpv/os/xpv_panic.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -86,6 +86,20 @@ static pfn_t ptable_pfn[MAX_NUM_LEVEL];
static int xpv_dump_pages;
/*
+ * There are up to two large swathes of RAM that we don't want to include
+ * in the dump: those that comprise the Xen version of segkpm. On 32-bit
+ * systems there is no such region of memory. On 64-bit systems, there
+ * should be just a single contiguous region that corresponds to all of
+ * physical memory. The tricky bit is that Xen's heap sometimes lives in
+ * the middle of their segkpm, and is mapped using only kpm-like addresses.
+ * In that case, we need to skip the swathes before and after Xen's heap.
+ */
+uintptr_t kpm1_low = 0;
+uintptr_t kpm1_high = 0;
+uintptr_t kpm2_low = 0;
+uintptr_t kpm2_high = 0;
+
+/*
* Some commonly used values that we don't want to recompute over and over.
*/
static int xpv_panic_nptes[MAX_NUM_LEVEL];
@@ -214,15 +228,6 @@ xpv_va_walk(uintptr_t *vaddr)
idx++;
scan_va += mmu.level_size[l];
}
- va = scan_va;
-
- /*
- * See if we've hit the end of the range.
- */
- if (scan_va >= xpv_end || scan_va < *vaddr) {
- va = scan_va;
- break;
- }
/*
* If there are no valid mappings in this table, we
@@ -233,6 +238,13 @@ xpv_va_walk(uintptr_t *vaddr)
break;
}
+ va = scan_va;
+ /*
+ * See if we've hit the end of the range.
+ */
+ if (va >= xpv_end || va < *vaddr)
+ break;
+
/*
* If this mapping is for a pagetable, we drop down
* to the next level in the hierarchy and look for
@@ -252,10 +264,16 @@ xpv_va_walk(uintptr_t *vaddr)
break;
}
- /* We also want to skip the Xen version of KPM */
- if (va >= (uintptr_t)xpv_panic_info->pi_ram_start &&
- va < (uintptr_t)xpv_panic_info->pi_ram_end) {
- va = (uintptr_t)xpv_panic_info->pi_ram_end;
+ /*
+ * See if the address is within one of the two
+ * kpm-like regions we want to skip.
+ */
+ if (va >= kpm1_low && va < kpm1_high) {
+ va = kpm1_high;
+ break;
+ }
+ if (va >= kpm2_low && va < kpm2_high) {
+ va = kpm2_high;
break;
}
@@ -691,6 +709,17 @@ xpv_do_panic(void *arg)
xpv_panic_info = pip;
+#if defined(__amd64)
+ kpm1_low = (uintptr_t)xpv_panic_info->pi_ram_start;
+ if (xpv_panic_info->pi_xen_start == NULL) {
+ kpm1_high = (uintptr_t)xpv_panic_info->pi_ram_end;
+ } else {
+ kpm1_high = (uintptr_t)xpv_panic_info->pi_xen_start;
+ kpm2_low = (uintptr_t)xpv_panic_info->pi_xen_end;
+ kpm2_high = (uintptr_t)xpv_panic_info->pi_ram_end;
+ }
+#endif
+
/*
* Make sure we are running on the Solaris %gs. The Xen panic code
* should already have set up the GDT properly.
diff --git a/usr/src/uts/intel/os/driver_aliases b/usr/src/uts/intel/os/driver_aliases
index 0fe52eb075..2b2c09cced 100644
--- a/usr/src/uts/intel/os/driver_aliases
+++ b/usr/src/uts/intel/os/driver_aliases
@@ -33,6 +33,7 @@ cpudrv "cpu"
xnbe "xnb,ioemu"
xnbo "xnb,SUNW_mac"
xnbu "xnb,netfront"
+xnbo xnb
pit_beep "SUNW,pit_beep"
intel_nb5000 "pci8086,25d8"
intel_nb5000 "pci8086,25d4"
diff --git a/usr/src/uts/intel/sys/hypervisor.h b/usr/src/uts/intel/sys/hypervisor.h
index 9f5aadd499..5e013abd15 100644
--- a/usr/src/uts/intel/sys/hypervisor.h
+++ b/usr/src/uts/intel/sys/hypervisor.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -206,7 +206,7 @@ extern long HYPERVISOR_vcpu_op(int, int, void *);
extern long HYPERVISOR_set_segment_base(int, ulong_t);
#endif /* __amd64 */
extern int HYPERVISOR_mmuext_op(struct mmuext_op *, int, uint_t *, domid_t);
-extern long HYPERVISOR_acm_op(int cmd, void *);
+extern long HYPERVISOR_acm_op(struct xen_acmctl *);
extern long HYPERVISOR_nmi_op(int cmd, void *);
extern long HYPERVISOR_sched_op(int, void *);
extern long HYPERVISOR_callback_op(int, void *);