diff options
author | rab <none@none> | 2008-03-03 17:05:43 -0800 |
---|---|---|
committer | rab <none@none> | 2008-03-03 17:05:43 -0800 |
commit | a576ab5b6e08c47732b3dedca9eaa8a8cbb85720 (patch) | |
tree | d3c3e79234e0f53a48282500d03af0cb57e0ac5e /usr/src | |
parent | 247f8eaa502d3244b05aa230214295f0f1067c59 (diff) | |
download | illumos-gate-a576ab5b6e08c47732b3dedca9eaa8a8cbb85720.tar.gz |
6602031 move xVM to xen 3.1
6637636 HVM domU live migration support
6663166 xdb needs to sanity check nr_segments
Diffstat (limited to 'usr/src')
42 files changed, 1960 insertions, 595 deletions
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_fdio.c b/usr/src/cmd/mdb/common/mdb/mdb_fdio.c index 4bcac3e687..6fb9a7e1b0 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_fdio.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_fdio.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -323,3 +323,10 @@ mdb_fdio_create_named(int fd, const char *name) return (io); } + +int +mdb_fdio_fileno(mdb_io_t *io) +{ + fd_data_t *fdp = io->io_data; + return (fdp->fd_fd); +} diff --git a/usr/src/cmd/mdb/common/mdb/mdb_gelf.c b/usr/src/cmd/mdb/common/mdb/mdb_gelf.c index f334a018e0..ade3da21cf 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_gelf.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_gelf.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -134,8 +133,8 @@ gelf_sect_init(mdb_gelf_file_t *gf) return (gf); } -static void * -gelf_sect_load(mdb_gelf_file_t *gf, mdb_gelf_sect_t *gsp) +void * +mdb_gelf_sect_load(mdb_gelf_file_t *gf, mdb_gelf_sect_t *gsp) { ssize_t nbytes; @@ -1124,10 +1123,10 @@ mdb_gelf_symtab_create_file_by_name(mdb_gelf_file_t *gf, if (gst->gst_dsect == NULL || gst->gst_ssect == NULL) goto err; /* Failed to locate data or string section */ - if (gelf_sect_load(gf, gst->gst_dsect) == NULL) + if (mdb_gelf_sect_load(gf, gst->gst_dsect) == NULL) goto err; /* Failed to load data section */ - if (gelf_sect_load(gf, gst->gst_ssect) == NULL) + if (mdb_gelf_sect_load(gf, gst->gst_ssect) == NULL) goto err; /* Failed to load string section */ if (gf->gf_ehdr.e_ident[EI_CLASS] == ELFCLASS32) @@ -1278,10 +1277,10 @@ mdb_gelf_symtab_create_dynamic(mdb_gelf_file_t *gf, uint_t tabid) gst->gst_ssect->gs_shdr.sh_size = dt_strsz; gst->gst_ssect->gs_shdr.sh_entsize = 0; - if (gelf_sect_load(gf, gst->gst_dsect) == NULL) + if (mdb_gelf_sect_load(gf, gst->gst_dsect) == NULL) goto err; - if (gelf_sect_load(gf, gst->gst_ssect) == NULL) + if (mdb_gelf_sect_load(gf, gst->gst_ssect) == NULL) goto err; if (gf->gf_ehdr.e_ident[EI_CLASS] == ELFCLASS32) @@ -1876,3 +1875,16 @@ mdb_gelf_rw(mdb_gelf_file_t *gf, void *buf, size_t nbytes, uintptr_t addr, return (nbytes - resid); } + +mdb_gelf_sect_t * +mdb_gelf_sect_by_name(mdb_gelf_file_t *gf, const char *name) +{ + int i; + + for (i = 0; i < gf->gf_shnum; i++) { + if (strcmp(gf->gf_sects[i].gs_name, name) == 0) + return (&gf->gf_sects[i]); + } + + return (NULL); +} diff --git a/usr/src/cmd/mdb/common/mdb/mdb_gelf.h b/usr/src/cmd/mdb/common/mdb/mdb_gelf.h index 5f7629cfcc..4f9d5a95d4 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_gelf.h +++ b/usr/src/cmd/mdb/common/mdb/mdb_gelf.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -139,6 +138,11 @@ extern void mdb_gelf_symtab_insert(mdb_gelf_symtab_t *, extern void mdb_gelf_symtab_delete(mdb_gelf_symtab_t *, const char *, GElf_Sym *); +extern mdb_gelf_sect_t *mdb_gelf_sect_by_name(mdb_gelf_file_t *, + const char *); + +extern void *mdb_gelf_sect_load(mdb_gelf_file_t *, mdb_gelf_sect_t *); + #endif /* _MDB */ #ifdef __cplusplus diff --git a/usr/src/cmd/mdb/common/mdb/mdb_io.h b/usr/src/cmd/mdb/common/mdb/mdb_io.h index 70ede3807e..2ef4677db4 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_io.h +++ b/usr/src/cmd/mdb/common/mdb/mdb_io.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -182,6 +182,8 @@ extern void mdb_table_print(uint_t, const char *, ...); extern int mdb_setupterm(const char *, mdb_io_t *, int *); +extern int mdb_fdio_fileno(mdb_io_t *); + #endif /* _MDB */ #ifdef __cplusplus diff --git a/usr/src/cmd/mdb/common/mdb/mdb_kvm.c b/usr/src/cmd/mdb/common/mdb/mdb_kvm.c index caeadde0e5..0c1cc673fa 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_kvm.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_kvm.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1398,6 +1398,17 @@ mdb_kvm_tgt_create(mdb_tgt_t *t, int argc, const char *argv[]) getops = (mdb_kb_ops_t *(*)())dlsym(RTLD_NEXT, "mdb_kb_ops"); + /* + * Load mdb_kb if it's not already loaded during + * identification. + */ + if (getops == NULL) { + (void) mdb_module_load("mdb_kb", + MDB_MOD_GLOBAL | MDB_MOD_SILENT); + getops = (mdb_kb_ops_t *(*)()) + dlsym(RTLD_NEXT, "mdb_kb_ops"); + } + if (getops == NULL || (kt->k_kb_ops = getops()) == NULL) { warn("failed to load KVM backend ops\n"); goto err; diff --git a/usr/src/cmd/mdb/common/mdb/mdb_main.c b/usr/src/cmd/mdb/common/mdb/mdb_main.c index a3b5f67a54..f72ead52e5 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_main.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_main.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -367,6 +367,38 @@ mdb_scf_console_term(void) return (term); } +/* + * Unpleasant hack: we might be debugging a hypervisor domain dump. + * Earlier versions use a non-ELF file. Later versions are ELF, but are + * /always/ ELF64, so our standard ehdr check isn't good enough. Since + * we don't want to know too much about the file format, we'll ask + * mdb_kb. + */ +#ifdef __x86 +static int +identify_xvm_file(const char *file, int *longmode) +{ + int (*identify)(const char *, int *); + + if (mdb_module_load("mdb_kb", MDB_MOD_GLOBAL | MDB_MOD_SILENT) != 0) + return (0); + + identify = (int (*)())dlsym(RTLD_NEXT, "xkb_identify"); + + if (identify == NULL) + return (0); + + return (identify(file, longmode)); +} +#else +/*ARGSUSED*/ +static int +identify_xvm_file(const char *file, int *longmode) +{ + return (0); +} +#endif /* __x86 */ + int main(int argc, char *argv[], char *envp[]) { @@ -385,6 +417,7 @@ main(int argc, char *argv[], char *envp[]) int fflag = 0, Kflag = 0, Rflag = 0, Sflag = 0, Oflag = 0, Uflag = 0; int ttylike; + int longmode = 0; stack_t sigstack; @@ -687,6 +720,33 @@ main(int argc, char *argv[], char *envp[]) if ((mdb.m_shell = getenv("SHELL")) == NULL) mdb.m_shell = "/bin/sh"; + /* + * If the debugger state is to be inherited from a previous instance, + * restore it now prior to path evaluation so that %R is updated. + */ + if ((p = getenv(MDB_CONFIG_ENV_VAR)) != NULL) { + mdb_set_config(p); + (void) unsetenv(MDB_CONFIG_ENV_VAR); + } + + /* + * Path evaluation part 1: Create the initial module path to allow + * the target constructor to load a support module. Then expand + * any command-line arguments that modify the paths. + */ + if (Iflag != NULL) + mdb_set_ipath(Iflag); + else + mdb_set_ipath(MDB_DEF_IPATH); + + if (Lflag != NULL) + mdb_set_lpath(Lflag); + else + mdb_set_lpath(MDB_DEF_LPATH); + + if (mdb_get_prompt() == NULL && !(mdb.m_flags & MDB_FL_ADB)) + (void) mdb_set_prompt(MDB_DEF_PROMPT); + if (tgt_ctor == mdb_kvm_tgt_create) { if (pidarg != NULL) { warn("-p and -k options are mutually exclusive\n"); @@ -791,6 +851,19 @@ main(int argc, char *argv[], char *envp[]) mdb_io_destroy(io); + if (identify_xvm_file(tgt_argv[0], &longmode) == 1 && + !fflag) { +#ifdef _LP64 + if (!longmode) + goto reexec; +#else + if (longmode) + goto reexec; +#endif + tgt_ctor = mdb_kvm_tgt_create; + goto tcreate; + } + if (tgt_ctor == mdb_rawfile_tgt_create) goto tcreate; /* skip re-exec and just create target */ @@ -845,62 +918,6 @@ tcreate: if (tgt_ctor == NULL) tgt_ctor = mdb_proc_tgt_create; - /* - * If the debugger state is to be inherited from a previous instance, - * restore it now prior to path evaluation so that %R is updated. - */ - if ((p = getenv(MDB_CONFIG_ENV_VAR)) != NULL) { - mdb_set_config(p); - (void) unsetenv(MDB_CONFIG_ENV_VAR); - } - - /* - * Path evaluation part 1: Create the initial module path to allow - * the target constructor to load a support module. Then expand - * any command-line arguments that modify the paths. - */ - if (Iflag != NULL) - mdb_set_ipath(Iflag); - else - mdb_set_ipath(MDB_DEF_IPATH); - - if (Lflag != NULL) - mdb_set_lpath(Lflag); - else - mdb_set_lpath(MDB_DEF_LPATH); - - if (mdb_get_prompt() == NULL && !(mdb.m_flags & MDB_FL_ADB)) - (void) mdb_set_prompt(MDB_DEF_PROMPT); - -#ifdef __x86 - /* - * Unpleasant hack: we might be debugging a hypervisor domain dump, - * which can be a non-ELF file in earlier versions. Since we need to - * know some unpleasant details about the format of the file, we ask - * mdb_kb to identify the file if it can, and switch targets based on - * its response. - */ - if (tgt_ctor == mdb_rawfile_tgt_create && !fflag) { - int (*identify)(const char *, int *); - int longmode; - - if (mdb_module_load("mdb_kb", - MDB_MOD_GLOBAL | MDB_MOD_SILENT) == 0 && - (identify = (int (*)())dlsym(RTLD_NEXT, "xkb_identify")) - != NULL && identify(tgt_argv[0], &longmode) == 1) { - tgt_ctor = mdb_kvm_tgt_create; -#ifdef _LP64 - if (!longmode) - goto reexec; -#else - if (longmode) - goto reexec; -#endif - } - } -#endif /* __x86 */ - - tgt = mdb_tgt_create(tgt_ctor, mdb.m_tgtflags, tgt_argc, tgt_argv); if (tgt == NULL) { diff --git a/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c b/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c index 0dd529dd32..79b0a1c7fa 100644 --- a/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c +++ b/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c @@ -19,17 +19,25 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* - * KVM backend for hypervisor domain dumps. We don't use libkvm for such - * dumps, since they do not have a namelist file or the typical dump structures - * we expect to aid bootstrapping. Instead, we bootstrap based upon a - * debug_info structure at a known VA, using the guest's own page tables to - * resolve to physical addresses, and construct the namelist in a manner - * similar to ksyms_snapshot(). + * KVM backend for hypervisor domain dumps. We don't use libkvm for + * such dumps, since they do not have a namelist file or the typical + * dump structures we expect to aid bootstrapping. Instead, we + * bootstrap based upon a debug_info structure at a known VA, using the + * guest's own page tables to resolve to physical addresses, and + * construct the namelist in a manner similar to ksyms_snapshot(). + * + * Note that there are two formats understood by this module: the older, + * ad hoc format, which we call 'core' within this file, and an + * ELF-based format, known as 'elf'. + * + * We only support the older format generated on Solaris dom0: before we + * fixed it, core dump files were broken whenever a PFN didn't map a + * real MFN (!). */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -62,13 +70,8 @@ #include <mdb/mdb_target_impl.h> #include <xen/public/xen.h> - -#if defined(__i386) -#define DEF_DEBUG_INFO_VA 0xfb3ff000 -#define PAE_DEBUG_INFO_VA 0xf4bff000 -#elif defined(__amd64) -#define DEF_DEBUG_INFO_VA 0xfffffffffb7ff000 -#endif +#include <xen/public/version.h> +#include <xen/public/elfnote.h> #define XKB_SHDR_NULL 0 #define XKB_SHDR_SYMTAB 1 @@ -81,18 +84,20 @@ #define XKB_WALK_STR 0x4 #define XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR) +#if defined(__i386) +#define DEBUG_INFO 0xf4bff000 +#elif defined(__amd64) +#define DEBUG_INFO 0xfffffffffb7ff000 +#endif + #define PAGE_SIZE 0x1000 #define PAGE_SHIFT 12 #define PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1)) #define PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1)) +#define PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0) #define PT_PADDR 0x000ffffffffff000ull #define PT_VALID 0x1 -/* - * Once the headers are available easily from within ON, we can use those, but - * until then these definitions are duplicates. - */ - #define XC_CORE_MAGIC 0xF00FEBED #define XC_CORE_MAGIC_HVM 0xF00FEBEE @@ -107,6 +112,33 @@ typedef struct xc_core_header { unsigned int xch_pages_offset; } xc_core_header_t; +struct xc_elf_header { + uint64_t xeh_magic; + uint64_t xeh_nr_vcpus; + uint64_t xeh_nr_pages; + uint64_t xeh_page_size; +}; + +struct xc_elf_version { + uint64_t xev_major; + uint64_t xev_minor; + xen_extraversion_t xev_extra; + xen_compile_info_t xev_compile_info; + xen_capabilities_info_t xev_capabilities; + xen_changeset_info_t xev_changeset; + xen_platform_parameters_t xev_platform_parameters; + uint64_t xev_pagesize; +}; + +/* + * Either an old-style (3.0.4) core format, or the ELF format. + */ +typedef enum { + XKB_FORMAT_UNKNOWN = 0, + XKB_FORMAT_CORE = 1, + XKB_FORMAT_ELF = 2 +} xkb_type_t; + typedef struct mfn_map { mfn_t mm_mfn; char *mm_map; @@ -119,22 +151,46 @@ typedef struct mmu_info { size_t mi_ptesize; } mmu_info_t; +typedef struct xkb_core { + xc_core_header_t xc_hdr; + void *xc_p2m_buf; +} xkb_core_t; + +typedef struct xkb_elf { + mdb_gelf_file_t *xe_gelf; + size_t *xe_off; + struct xc_elf_header xe_hdr; + struct xc_elf_version xe_version; +} xkb_elf_t; + typedef struct xkb { char *xkb_path; int xkb_fd; - xc_core_header_t xkb_hdr; - char *xkb_namelist; - size_t xkb_namesize; - struct vcpu_guest_context *xkb_ctxts; + + xkb_type_t xkb_type; + xkb_core_t xkb_core; + xkb_elf_t xkb_elf; + + size_t xkb_nr_vcpus; + size_t xkb_nr_pages; + size_t xkb_pages_off; + xen_pfn_t xkb_max_pfn; mfn_t xkb_max_mfn; + int xkb_is_pae; + mmu_info_t xkb_mmu; + debug_info_t xkb_info; + + struct vcpu_guest_context *xkb_vcpus; + char *xkb_pages; mfn_t *xkb_p2m; - void *xkb_p2m_buf; xen_pfn_t *xkb_m2p; - debug_info_t xkb_info; mfn_map_t xkb_pt_map[4]; mfn_map_t xkb_map; + + char *xkb_namelist; + size_t xkb_namesize; } xkb_t; static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0"; @@ -155,10 +211,20 @@ static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *); static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *); static int xkb_close(xkb_t *); +/* + * Jump through the hoops we need to to correctly identify a core file + * of either the old or new format. + */ int xkb_identify(const char *file, int *longmode) { xc_core_header_t header; + mdb_gelf_file_t *gf = NULL; + mdb_gelf_sect_t *sect = NULL; + mdb_io_t *io = NULL; + char *notes = NULL; + char *pos; + int ret = 0; size_t sz; int fd; @@ -172,24 +238,82 @@ xkb_identify(const char *file, int *longmode) (void) close(fd); - if (header.xch_magic != XC_CORE_MAGIC) - return (0); - - *longmode = 0; + if (header.xch_magic == XC_CORE_MAGIC) { + *longmode = 0; - /* - * Indeed. - */ - sz = header.xch_index_offset - header.xch_ctxt_offset; + /* + * Indeed. + */ + sz = header.xch_index_offset - header.xch_ctxt_offset; #ifdef _LP64 - if (sizeof (struct vcpu_guest_context) * header.xch_nr_vcpus == sz) - *longmode = 1; + if (sizeof (struct vcpu_guest_context) * + header.xch_nr_vcpus == sz) + *longmode = 1; #else - if (sizeof (struct vcpu_guest_context) * header.xch_nr_vcpus != sz) - *longmode = 1; + if (sizeof (struct vcpu_guest_context) * + header.xch_nr_vcpus != sz) + *longmode = 1; #endif /* _LP64 */ - return (1); + return (1); + } + + if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL) + return (-1); + + if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL) + goto out; + + if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL) + goto out; + + if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL) + goto out; + + for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) { + struct xc_elf_version *vers; + /* LINTED - alignment */ + Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos; + char *desc; + char *name; + + name = pos + sizeof (*nhdr); + desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4); + + pos = desc + nhdr->n_descsz; + + if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION) + continue; + + /* + * The contents of this struct differ between 32 and 64 + * bit; however, not until past the 'xev_capabilities' + * member, so we can just about get away with this. + */ + + /* LINTED - alignment */ + vers = (struct xc_elf_version *)desc; + + if (strstr(vers->xev_capabilities, "x86_64")) { + *longmode = 1; + } else if (strstr(vers->xev_capabilities, "x86_32") || + strstr(vers->xev_capabilities, "x86_32p")) { + *longmode = 0; + } else { + mdb_warn("couldn't derive word size of dump; " + "assuming 64-bit"); + *longmode = 1; + } + } + + ret = 1; + +out: + if (gf != NULL) + mdb_gelf_destroy(gf); + else if (io != NULL) + mdb_io_destroy(io); + return (ret); } static void * @@ -205,6 +329,9 @@ xkb_fail(xkb_t *xkb, const char *msg, ...) va_end(args); if (xkb != NULL) (void) xkb_close(xkb); + + errno = ENOEXEC; + return (NULL); } @@ -213,7 +340,7 @@ xkb_build_m2p(xkb_t *xkb) { size_t i; - for (i = 0; i < xkb->xkb_hdr.xch_nr_pages; i++) { + for (i = 0; i <= xkb->xkb_max_pfn; i++) { if (xkb->xkb_p2m[i] != MFN_INVALID && xkb->xkb_p2m[i] > xkb->xkb_max_mfn) xkb->xkb_max_mfn = xkb->xkb_p2m[i]; @@ -225,7 +352,7 @@ xkb_build_m2p(xkb_t *xkb) for (i = 0; i <= xkb->xkb_max_mfn; i++) xkb->xkb_m2p[i] = PFN_INVALID; - for (i = 0; i < xkb->xkb_hdr.xch_nr_pages; i++) { + for (i = 0; i <= xkb->xkb_max_pfn; i++) { if (xkb->xkb_p2m[i] != MFN_INVALID) xkb->xkb_m2p[xkb->xkb_p2m[i]] = i; } @@ -234,37 +361,93 @@ xkb_build_m2p(xkb_t *xkb) } /* - * Just to make things jolly fun, they've not page-aligned the p2m table. + * With FORMAT_CORE, we can use the table in the dump file directly. + * Just to make things fun, they've not page-aligned the p2m table. */ static int xkb_map_p2m(xkb_t *xkb) { offset_t off; size_t size; - size_t count = xkb->xkb_hdr.xch_nr_pages; - size_t boff = xkb->xkb_hdr.xch_index_offset; + xkb_core_t *xc = &xkb->xkb_core; + size_t count = xkb->xkb_nr_pages; + size_t boff = xc->xc_hdr.xch_index_offset; - size = sizeof (mfn_t) * count + (PAGE_SIZE) * 2; + size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2); size = PAGE_MASK(size); off = PAGE_MASK(boff); /* LINTED - alignment */ - xkb->xkb_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ, + xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ, MAP_SHARED, xkb->xkb_fd, off); - if (xkb->xkb_p2m_buf == (xen_pfn_t *)MAP_FAILED) { + if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) { (void) xkb_fail(xkb, "cannot map p2m table"); return (0); } /* LINTED - alignment */ - xkb->xkb_p2m = (mfn_t *)((char *)xkb->xkb_p2m_buf + + xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf + PAGE_OFFSET(boff)); return (1); } /* + * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert + * into a linear array indexed by pfn for convenience. We also need to + * track the mapping between mfn and the offset in the file: a pfn with + * no mfn will not appear in the core file. + */ +static int +xkb_build_p2m(xkb_t *xkb) +{ + xkb_elf_t *xe = &xkb->xkb_elf; + mdb_gelf_sect_t *sect; + size_t size; + size_t i; + + struct elf_p2m { + uint64_t pfn; + uint64_t gmfn; + } *p2m; + + sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m"); + + if (sect == NULL) { + (void) xkb_fail(xkb, "cannot find section .xen_p2m"); + return (0); + } + + if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) { + (void) xkb_fail(xkb, "couldn't read .xen_p2m"); + return (0); + } + + for (i = 0; i < xkb->xkb_nr_pages; i++) { + if (p2m[i].pfn > xkb->xkb_max_pfn) + xkb->xkb_max_pfn = p2m[i].pfn; + } + + size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1); + xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP); + size = sizeof (size_t) * (xkb->xkb_max_pfn + 1); + xe->xe_off = mdb_alloc(size, UM_SLEEP); + + for (i = 0; i <= xkb->xkb_max_pfn; i++) { + xkb->xkb_p2m[i] = PFN_INVALID; + xe->xe_off[i] = (size_t)-1; + } + + for (i = 0; i < xkb->xkb_nr_pages; i++) { + xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn; + xe->xe_off[p2m[i].pfn] = i; + } + + return (1); +} + +/* * Return the MFN of the top-level page table for the given as. */ static mfn_t @@ -284,7 +467,7 @@ xkb_as_to_mfn(xkb_t *xkb, struct as *as) &pfn)) return (MFN_INVALID); - if (pfn >= xkb->xkb_hdr.xch_nr_pages) + if (pfn > xkb->xkb_max_pfn) return (MFN_INVALID); return (xkb->xkb_p2m[pfn]); @@ -295,8 +478,8 @@ xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr, void *buf, size_t size) { size_t left = size; - int windowed = xkb->xkb_pages == NULL; - mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_ctxts[0].ctrlreg[3]); + int windowed = (xkb->xkb_pages == NULL); + mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_vcpus[0].ctrlreg[3]); if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID) return (-1); @@ -314,7 +497,7 @@ xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr, return (-1); } else { xen_pfn_t pfn = pos >> PAGE_SHIFT; - if (pfn >= xkb->xkb_hdr.xch_nr_pages) + if (pfn > xkb->xkb_max_pfn) return (-1); mfn = xkb->xkb_p2m[pfn]; if (mfn == MFN_INVALID) @@ -405,6 +588,18 @@ xkb_readstr(xkb_t *xkb, uintptr_t addr) } static offset_t +xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn) +{ + if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn) + return (-1ULL); + + if (xkb->xkb_type == XKB_FORMAT_CORE) + return (PAGE_SIZE * pfn); + + return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn])); +} + +static offset_t xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn) { xen_pfn_t pfn; @@ -417,13 +612,13 @@ xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn) if (pfn == PFN_INVALID) return (-1ULL); - return (xkb->xkb_hdr.xch_pages_offset + (PAGE_SIZE * pfn)); + return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn)); } static char * xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm) { - int windowed = xkb->xkb_pages == NULL; + int windowed = (xkb->xkb_pages == NULL); offset_t off; if (mm->mm_mfn == mfn) @@ -458,7 +653,7 @@ xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm) if (pfn == PFN_INVALID) return (NULL); - mm->mm_map = xkb->xkb_pages + (PAGE_SIZE * pfn); + mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn); } return (mm->mm_map); @@ -467,10 +662,12 @@ xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm) static mfn_t xkb_pte_to_mfn(mmu_info_t *mmu, char *ptep) { - /* LINTED - alignment */ - uint64_t pte = *((uint64_t *)ptep); + uint64_t pte = 0; - if (mmu->mi_ptesize == 4) { + if (mmu->mi_ptesize == 8) { + /* LINTED - alignment */ + pte = *((uint64_t *)ptep); + } else { /* LINTED - alignment */ pte = *((uint32_t *)ptep); } @@ -759,70 +956,223 @@ xkb_build_ksyms(xkb_t *xkb) return (1); } -/*ARGSUSED*/ -xkb_t * -xkb_open(const char *namelist, const char *corefile, const char *swapfile, - int flag, const char *err) +static xkb_t * +xkb_open_core(xkb_t *xkb) { - struct stat64 corestat; - uintptr_t debug_va = DEF_DEBUG_INFO_VA; + xkb_core_t *xc = &xkb->xkb_core; size_t sz; - size_t i; - xkb_t *xkb = NULL; - if (stat64(corefile, &corestat) == -1) - return (xkb_fail(xkb, "cannot stat %s", corefile)); + xkb->xkb_type = XKB_FORMAT_CORE; - if (flag != O_RDONLY) - return (xkb_fail(xkb, "invalid open flags")); + if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1) + return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path)); - xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP); + if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) != + sizeof (xc->xc_hdr)) + return (xkb_fail(xkb, "invalid dump file")); - for (i = 0; i < 4; i++) - xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED; + if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM) + return (xkb_fail(xkb, "cannot process HVM images")); - xkb->xkb_map.mm_map = (char *)MAP_FAILED; - xkb->xkb_p2m_buf = (char *)MAP_FAILED; + if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) { + return (xkb_fail(xkb, "invalid magic %d", + xc->xc_hdr.xch_magic)); + } - xkb->xkb_path = strdup(corefile); + /* + * With FORMAT_CORE, all pages are in the dump (non-existing + * ones are zeroed out). + */ + xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages; + xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset; + xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1; + xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus; - if ((xkb->xkb_fd = open64(corefile, O_RDONLY)) == -1) - return (xkb_fail(xkb, "cannot open %s", corefile)); + sz = xkb->xkb_nr_vcpus * sizeof (*xkb->xkb_vcpus); - if (pread64(xkb->xkb_fd, &xkb->xkb_hdr, sizeof (xkb->xkb_hdr), 0) != - sizeof (xkb->xkb_hdr)) - return (xkb_fail(xkb, "invalid dump file")); + xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP); - if (xkb->xkb_hdr.xch_magic == XC_CORE_MAGIC_HVM) + if (pread64(xkb->xkb_fd, xkb->xkb_vcpus, sz, + xc->xc_hdr.xch_ctxt_offset) != sz) + return (xkb_fail(xkb, "cannot read VCPU contexts")); + + if (xkb->xkb_vcpus[0].flags & VGCF_HVM_GUEST) return (xkb_fail(xkb, "cannot process HVM images")); - if (xkb->xkb_hdr.xch_magic != XC_CORE_MAGIC) { - return (xkb_fail(xkb, "invalid magic %d", - xkb->xkb_hdr.xch_magic)); + /* + * Try to map all the data pages. If we can't, fall back to the + * window/pread() approach, which is significantly slower. + */ + xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages, + PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset); + + if (xkb->xkb_pages == (char *)MAP_FAILED) + xkb->xkb_pages = NULL; + + /* + * We'd like to adapt for correctness' sake, but we have no way of + * detecting a PAE guest, since cr4 writes are disallowed. + */ + xkb->xkb_is_pae = 1; + + if (!xkb_map_p2m(xkb)) + return (NULL); + + return (xkb); +} + +static xkb_t * +xkb_open_elf(xkb_t *xkb) +{ + xkb_elf_t *xe = &xkb->xkb_elf; + mdb_gelf_sect_t *sect; + char *notes; + char *pos; + mdb_io_t *io; + + if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path, + O_RDONLY, 0)) == NULL) + return (xkb_fail(xkb, "failed to open")); + + xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE); + + if (xe->xe_gelf == NULL) { + mdb_io_destroy(io); + return (xkb); } - sz = xkb->xkb_hdr.xch_nr_vcpus * sizeof (*xkb->xkb_ctxts); + xkb->xkb_fd = mdb_fdio_fileno(io); - xkb->xkb_ctxts = mdb_alloc(sz, UM_SLEEP); + sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen"); - if (pread64(xkb->xkb_fd, xkb->xkb_ctxts, sz, - xkb->xkb_hdr.xch_ctxt_offset) != sz) - return (xkb_fail(xkb, "cannot read VCPU contexts")); + if (sect == NULL) + return (xkb); + + if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) + return (xkb); + + /* + * Now we know this is indeed a hypervisor core dump, even if + * it's corrupted. + */ + xkb->xkb_type = XKB_FORMAT_ELF; + + for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) { + /* LINTED - alignment */ + Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos; + uint64_t vers; + char *desc; + char *name; + + name = pos + sizeof (*nhdr); + desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4); - if (xkb->xkb_ctxts[0].flags & VGCF_HVM_GUEST) + pos = desc + nhdr->n_descsz; + + switch (nhdr->n_type) { + case XEN_ELFNOTE_DUMPCORE_NONE: + break; + + case XEN_ELFNOTE_DUMPCORE_HEADER: + if (nhdr->n_descsz != sizeof (struct xc_elf_header)) { + return (xkb_fail(xkb, "invalid ELF note " + "XEN_ELFNOTE_DUMPCORE_HEADER\n")); + } + + bcopy(desc, &xe->xe_hdr, + sizeof (struct xc_elf_header)); + break; + + case XEN_ELFNOTE_DUMPCORE_XEN_VERSION: + if (nhdr->n_descsz != sizeof (struct xc_elf_version)) { + return (xkb_fail(xkb, "invalid ELF note " + "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n")); + } + + bcopy(desc, &xe->xe_version, + sizeof (struct xc_elf_version)); + break; + + case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION: + /* LINTED - alignment */ + vers = *((uint64_t *)desc); + if ((vers >> 32) != 0) { + return (xkb_fail(xkb, "unknown major " + "version %d (expected 0)\n", + (int)(vers >> 32))); + } + + if ((vers & 0xffffffff) != 1) { + mdb_warn("unexpected dump minor number " + "version %d (expected 1)\n", + (int)(vers & 0xffffffff)); + } + break; + + default: + mdb_warn("unknown ELF note %d(%s)\n", + nhdr->n_type, name); + break; + } + } + + if (xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM) return (xkb_fail(xkb, "cannot process HVM images")); + if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC) { + return (xkb_fail(xkb, "invalid magic %d", + xe->xe_hdr.xeh_magic)); + } + + xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages; + xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities, + "x86_32p") != NULL); + + sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus"); + + if (sect == NULL) + return (xkb_fail(xkb, "cannot find section .xen_prstatus")); + + if (sect->gs_shdr.sh_entsize != sizeof (vcpu_guest_context_t)) + return (xkb_fail(xkb, "invalid section .xen_prstatus")); + + xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize; + + if ((xkb->xkb_vcpus = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) + return (xkb_fail(xkb, "cannot load section .xen_prstatus")); + + sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages"); + + if (sect == NULL) + return (xkb_fail(xkb, "cannot find section .xen_pages")); + + if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset)) + return (xkb_fail(xkb, ".xen_pages is not page aligned")); + + if (sect->gs_shdr.sh_entsize != PAGE_SIZE) + return (xkb_fail(xkb, "invalid section .xen_pages")); + + xkb->xkb_pages_off = sect->gs_shdr.sh_offset; + /* * Try to map all the data pages. If we can't, fall back to the * window/pread() approach, which is significantly slower. */ - xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_hdr.xch_nr_pages, - PROT_READ, MAP_SHARED, xkb->xkb_fd, - xkb->xkb_hdr.xch_pages_offset); + xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages, + PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off); if (xkb->xkb_pages == (char *)MAP_FAILED) xkb->xkb_pages = NULL; + if (!xkb_build_p2m(xkb)) + return (NULL); + + return (xkb); +} + +static void +xkb_init_mmu(xkb_t *xkb) +{ #if defined(__amd64) xkb->xkb_mmu.mi_max = 3; xkb->xkb_mmu.mi_shift[0] = 12; @@ -832,26 +1182,64 @@ xkb_open(const char *namelist, const char *corefile, const char *swapfile, xkb->xkb_mmu.mi_ptes = 512; xkb->xkb_mmu.mi_ptesize = 8; #elif defined(__i386) - /* - * We'd like to adapt for correctness' sake, but we have no way of - * detecting a PAE guest, since cr4 writes are disallowed. - */ - debug_va = PAE_DEBUG_INFO_VA; - xkb->xkb_mmu.mi_max = 2; - xkb->xkb_mmu.mi_shift[0] = 12; - xkb->xkb_mmu.mi_shift[1] = 21; - xkb->xkb_mmu.mi_shift[2] = 30; - xkb->xkb_mmu.mi_ptes = 512; - xkb->xkb_mmu.mi_ptesize = 8; + if (xkb->xkb_is_pae) { + xkb->xkb_mmu.mi_max = 2; + xkb->xkb_mmu.mi_shift[0] = 12; + xkb->xkb_mmu.mi_shift[1] = 21; + xkb->xkb_mmu.mi_shift[2] = 30; + xkb->xkb_mmu.mi_ptes = 512; + xkb->xkb_mmu.mi_ptesize = 8; + } else { + xkb->xkb_mmu.mi_max = 1; + xkb->xkb_mmu.mi_shift[0] = 12; + xkb->xkb_mmu.mi_shift[1] = 22; + xkb->xkb_mmu.mi_ptes = 1024; + xkb->xkb_mmu.mi_ptesize = 4; + } #endif +} - if (!xkb_map_p2m(xkb)) +/*ARGSUSED*/ +xkb_t * +xkb_open(const char *namelist, const char *corefile, const char *swapfile, + int flag, const char *err) +{ + struct stat64 corestat; + xkb_t *xkb = NULL; + size_t i; + + if (stat64(corefile, &corestat) == -1) + return (xkb_fail(xkb, "cannot stat %s", corefile)); + + if (flag != O_RDONLY) + return (xkb_fail(xkb, "invalid open flags")); + + xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP); + + for (i = 0; i < 4; i++) + xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED; + + xkb->xkb_type = XKB_FORMAT_UNKNOWN; + xkb->xkb_map.mm_map = (char *)MAP_FAILED; + xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED; + xkb->xkb_fd = -1; + + xkb->xkb_path = strdup(corefile); + + if ((xkb = xkb_open_elf(xkb)) == NULL) return (NULL); + if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) { + if (!xkb_open_core(xkb)) + return (NULL); + } + + xkb_init_mmu(xkb); + if (!xkb_build_m2p(xkb)) return (NULL); - if (xkb_read(xkb, debug_va, &xkb->xkb_info, + if (xkb_read(xkb, DEBUG_INFO, &xkb->xkb_info, sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info)) return (xkb_fail(xkb, "cannot read debug_info")); @@ -874,7 +1262,6 @@ xkb_open(const char *namelist, const char *corefile, const char *swapfile, int xkb_close(xkb_t *xkb) { - size_t sz; size_t i; if (xkb == NULL) @@ -885,14 +1272,9 @@ xkb_close(xkb_t *xkb) (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t)); } - sz = sizeof (xen_pfn_t) * xkb->xkb_hdr.xch_nr_pages; - - if (xkb->xkb_p2m_buf != (xen_pfn_t *)MAP_FAILED) - (void) munmap(xkb->xkb_p2m_buf, sz); - if (xkb->xkb_pages != NULL) { (void) munmap((void *)xkb->xkb_pages, - PAGE_SIZE * xkb->xkb_hdr.xch_nr_pages); + PAGE_SIZE * xkb->xkb_nr_pages); } else { for (i = 0; i < 4; i++) { char *addr = xkb->xkb_pt_map[i].mm_map; @@ -905,16 +1287,44 @@ xkb_close(xkb_t *xkb) } } - if (xkb->xkb_ctxts != NULL) { - mdb_free(xkb->xkb_ctxts, sizeof (struct vcpu_guest_context) * - xkb->xkb_hdr.xch_nr_vcpus); - } - if (xkb->xkb_namelist != NULL) mdb_free(xkb->xkb_namelist, xkb->xkb_namesize); - if (xkb->xkb_fd != -1) - (void) close(xkb->xkb_fd); + if (xkb->xkb_type == XKB_FORMAT_ELF) { + xkb_elf_t *xe = &xkb->xkb_elf; + size_t sz; + + if (xe->xe_gelf != NULL) + mdb_gelf_destroy(xe->xe_gelf); + + sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1); + + if (xkb->xkb_p2m != NULL) + mdb_free(xkb->xkb_p2m, sz); + + sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1); + + if (xe->xe_off != NULL) + mdb_free(xe->xe_off, sz); + } else if (xkb->xkb_type == XKB_FORMAT_CORE) { + xkb_core_t *xc = &xkb->xkb_core; + size_t sz; + + if (xkb->xkb_fd != -1) + (void) close(xkb->xkb_fd); + + sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2); + sz = PAGE_MASK(sz); + + if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED) + (void) munmap(xc->xc_p2m_buf, sz); + + if (xkb->xkb_vcpus != NULL) { + sz = sizeof (struct vcpu_guest_context) * + xkb->xkb_nr_vcpus; + mdb_free(xkb->xkb_vcpus, sz); + } + } free(xkb->xkb_path); @@ -937,7 +1347,7 @@ xkb_sym_io(xkb_t *xkb, const char *symfile) uint64_t xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr) { - mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_ctxts[0].ctrlreg[3]); + mfn_t tlmfn = xen_cr3_to_pfn(xkb->xkb_vcpus[0].ctrlreg[3]); mfn_t mfn; if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID) @@ -959,14 +1369,14 @@ xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs) struct cpu_user_regs *ur; struct regs *regs; - if (cpu >= xkb->xkb_hdr.xch_nr_vcpus) { + if (cpu >= xkb->xkb_nr_vcpus) { errno = EINVAL; return (-1); } bzero(mregs, sizeof (*mregs)); - vcpu = &xkb->xkb_ctxts[cpu]; + vcpu = &xkb->xkb_vcpus[cpu]; ur = &vcpu->user_regs; regs = &mregs->pm_gregs; diff --git a/usr/src/uts/common/xen/io/blkif_impl.h b/usr/src/uts/common/xen/io/blkif_impl.h new file mode 100644 index 0000000000..2684b81767 --- /dev/null +++ b/usr/src/uts/common/xen/io/blkif_impl.h @@ -0,0 +1,89 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef __XEN_BLKIF_H__ +#define __XEN_BLKIF_H__ + +#include <public/io/ring.h> +#include <public/io/blkif.h> +#include <public/io/protocols.h> + +/* Not a real protocol. Used to generate ring structs which contain + * the elements common to all protocols only. This way we get a + * compiler-checkable way to use common struct elements, so we can + * avoid using switch(protocol) in a number of places. */ + +/* i386 protocol version */ + +#pragma pack(4) + +struct blkif_x86_32_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_32_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_32_request blkif_x86_32_request_t; +typedef struct blkif_x86_32_response blkif_x86_32_response_t; + +#pragma pack() + +/* x86_64 protocol version */ +struct blkif_x86_64_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ +#if defined(__GNUC__) + uint64_t __attribute__((__aligned__(8))) id; +#else + uint8_t pad[4]; + uint64_t id; +#endif + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_64_response { +#if defined(__GNUC__) + uint64_t __attribute__((__aligned__(8))) id; +#else + uint64_t id; +#endif + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_64_request blkif_x86_64_request_t; +typedef struct blkif_x86_64_response blkif_x86_64_response_t; + +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); + +enum blkif_protocol { + BLKIF_PROTOCOL_NATIVE = 1, + BLKIF_PROTOCOL_X86_32 = 2, + BLKIF_PROTOCOL_X86_64 = 3, +}; + +#endif /* __XEN_BLKIF_H__ */ diff --git a/usr/src/uts/common/xen/io/xdb.c b/usr/src/uts/common/xen/io/xdb.c index 33a075ac3d..e4013de7b4 100644 --- a/usr/src/uts/common/xen/io/xdb.c +++ b/usr/src/uts/common/xen/io/xdb.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -73,11 +73,17 @@ #include <sys/gnttab.h> #include <sys/lofi.h> #include <io/xdf.h> +#include <xen/io/blkif_impl.h> #include <io/xdb.h> static xdb_t *xdb_statep; static int xdb_debug = 0; +static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t); +static int xdb_get_request(xdb_t *, blkif_request_t *); +static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *); +static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *); + #ifdef DEBUG /* * debug aid functions @@ -90,18 +96,18 @@ logva(xdb_t *vdp, uint64_t va) int i; page_addrs = vdp->page_addrs; - for (i = 0; i < XDB_MAX_IO_PAGES; i++) { + for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { if (page_addrs[i] == va) debug_enter("VA remapping found!"); } - for (i = 0; i < XDB_MAX_IO_PAGES; i++) { + for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { if (page_addrs[i] == 0) { page_addrs[i] = va; break; } } - ASSERT(i < XDB_MAX_IO_PAGES); + ASSERT(i < XDB_MAX_IO_PAGES(vdp)); } static void @@ -111,13 +117,13 @@ unlogva(xdb_t *vdp, uint64_t va) int i; page_addrs = vdp->page_addrs; - for (i = 0; i < XDB_MAX_IO_PAGES; i++) { + for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) { if (page_addrs[i] == va) { page_addrs[i] = 0; break; } } - ASSERT(i < XDB_MAX_IO_PAGES); + ASSERT(i < XDB_MAX_IO_PAGES(vdp)); } static void @@ -434,18 +440,10 @@ xdb_free_req(xdb_request_t *req) static void xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok) { - xendev_ring_t *ringp = vdp->xs_ring; ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; - blkif_response_t *resp; - - resp = xvdi_ring_get_response(ringp); - ASSERT(resp); - ddi_put64(acchdl, &resp->id, ddi_get64(acchdl, &req->id)); - ddi_put8(acchdl, &resp->operation, ddi_get8(acchdl, &req->operation)); - ddi_put16(acchdl, (uint16_t *)&resp->status, - ok ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); - if (xvdi_ring_push_response(ringp)) + if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id), + ddi_get8(acchdl, &req->operation), ok)) xvdi_notify_oe(vdp->xs_dip); } @@ -454,18 +452,28 @@ xdb_init_ioreqs(xdb_t *vdp) { int i; - for (i = 0; i < BLKIF_RING_SIZE; i++) { + ASSERT(vdp->xs_nentry); + + if (vdp->xs_req == NULL) + vdp->xs_req = kmem_alloc(vdp->xs_nentry * + sizeof (xdb_request_t), KM_SLEEP); +#ifdef DEBUG + if (vdp->page_addrs == NULL) + vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) * + sizeof (uint64_t), KM_SLEEP); +#endif + for (i = 0; i < vdp->xs_nentry; i++) { vdp->xs_req[i].xr_idx = i; vdp->xs_req[i].xr_next = i + 1; } - vdp->xs_req[BLKIF_RING_SIZE - 1].xr_next = -1; + vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1; vdp->xs_free_req = 0; /* alloc va in host dom for io page mapping */ vdp->xs_iopage_va = vmem_xalloc(heap_arena, - XDB_MAX_IO_PAGES * PAGESIZE, PAGESIZE, 0, 0, 0, 0, + XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0, VM_SLEEP); - for (i = 0; i < XDB_MAX_IO_PAGES; i++) + for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) hat_prepare_mapping(kas.a_hat, vdp->xs_iopage_va + i * PAGESIZE); } @@ -475,18 +483,29 @@ xdb_uninit_ioreqs(xdb_t *vdp) { int i; - for (i = 0; i < XDB_MAX_IO_PAGES; i++) + for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) hat_release_mapping(kas.a_hat, vdp->xs_iopage_va + i * PAGESIZE); vmem_xfree(heap_arena, vdp->xs_iopage_va, - XDB_MAX_IO_PAGES * PAGESIZE); + XDB_MAX_IO_PAGES(vdp) * PAGESIZE); + if (vdp->xs_req != NULL) { + kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t)); + vdp->xs_req = NULL; + } +#ifdef DEBUG + if (vdp->page_addrs != NULL) { + kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) * + sizeof (uint64_t)); + vdp->page_addrs = NULL; + } +#endif } static uint_t xdb_intr(caddr_t arg) { - xendev_ring_t *ringp; - blkif_request_t *req; + blkif_request_t req; + blkif_request_t *reqp = &req; xdb_request_t *xreq; buf_t *bp; uint8_t op; @@ -506,8 +525,6 @@ xdb_intr(caddr_t arg) return (DDI_INTR_UNCLAIMED); } - ringp = vdp->xs_ring; - /* * We'll loop till there is no more request in the ring * We won't stuck in this loop for ever since the size of ring buffer @@ -516,16 +533,16 @@ xdb_intr(caddr_t arg) */ /* req_event will be increased in xvdi_ring_get_request() */ - while ((req = xvdi_ring_get_request(ringp)) != NULL) { + while (xdb_get_request(vdp, reqp)) { ret = DDI_INTR_CLAIMED; - op = ddi_get8(vdp->xs_ring_hdl, &req->operation); + op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation); if (op == BLKIF_OP_READ || op == BLKIF_OP_WRITE || op == BLKIF_OP_WRITE_BARRIER || op == BLKIF_OP_FLUSH_DISKCACHE) { #ifdef DEBUG - xdb_dump_request_oe(req); + xdb_dump_request_oe(reqp); #endif xreq = xdb_get_req(vdp); ASSERT(xreq); @@ -545,11 +562,11 @@ xdb_intr(caddr_t arg) } xreq->xr_curseg = 0; /* start from first segment */ - bp = xdb_get_buf(vdp, req, xreq); + bp = xdb_get_buf(vdp, reqp, xreq); if (bp == NULL) { /* failed to form a buf */ xdb_free_req(xreq); - xdb_response(vdp, req, B_FALSE); + xdb_response(vdp, reqp, B_FALSE); continue; } bp->av_forw = NULL; @@ -566,9 +583,8 @@ xdb_intr(caddr_t arg) vdp->xs_l_iobuf->av_forw = bp; vdp->xs_l_iobuf = bp; } - vdp->xs_ionum++; } else { - xdb_response(vdp, req, B_FALSE); + xdb_response(vdp, reqp, B_FALSE); XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: " "Unsupported cmd received from dom %d", ddi_get_name_addr(dip), vdp->xs_peer)); @@ -586,14 +602,11 @@ xdb_intr(caddr_t arg) static int xdb_biodone(buf_t *bp) { - blkif_response_t *resp; int i, err, bioerr; uint8_t segs; gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST]; xdb_request_t *xreq = XDB_BP2XREQ(bp); xdb_t *vdp = xreq->xr_vdp; - xendev_ring_t *ringp = vdp->xs_ring; - ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; buf_t *nbp; bioerr = geterror(bp); @@ -663,13 +676,7 @@ xdb_biodone(buf_t *bp) /* send response back to frontend */ if (vdp->xs_if_status == XDB_CONNECTED) { - resp = xvdi_ring_get_response(ringp); - ASSERT(resp); - ddi_put64(acchdl, &resp->id, xreq->xr_id); - ddi_put8(acchdl, &resp->operation, xreq->xr_op); - ddi_put16(acchdl, (uint16_t *)&resp->status, - bioerr ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY); - if (xvdi_ring_push_response(ringp)) + if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr)) xvdi_notify_oe(vdp->xs_dip); XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "sent resp back to frontend, id=%llu", @@ -680,9 +687,10 @@ xdb_biodone(buf_t *bp) xdb_free_req(xreq); vdp->xs_ionum--; - if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0)) + if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0)) { /* we're closing, someone is waiting for I/O clean-up */ cv_signal(&vdp->xs_ionumcv); + } mutex_exit(&vdp->xs_iomutex); @@ -697,6 +705,7 @@ xdb_bindto_frontend(xdb_t *vdp) grant_ref_t gref; evtchn_port_t evtchn; dev_info_t *dip = vdp->xs_dip; + char protocol[64] = ""; /* * Gather info from frontend @@ -713,11 +722,50 @@ xdb_bindto_frontend(xdb_t *vdp) return (DDI_FAILURE); } + vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE; + vdp->xs_nentry = BLKIF_RING_SIZE; + vdp->xs_entrysize = sizeof (union blkif_sring_entry); + + err = xenbus_gather(XBT_NULL, oename, + "protocol", "%63s", protocol, NULL); + if (err) + (void) strcpy(protocol, "unspecified, assuming native"); + else { + /* + * We must check for NATIVE first, so that the fast path + * is taken for copying data from the guest to the host. + */ + if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) { + if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { + vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32; + vdp->xs_nentry = BLKIF_X86_32_RING_SIZE; + vdp->xs_entrysize = + sizeof (union blkif_x86_32_sring_entry); + } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) == + 0) { + vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64; + vdp->xs_nentry = BLKIF_X86_64_RING_SIZE; + vdp->xs_entrysize = + sizeof (union blkif_x86_64_sring_entry); + } else { + xvdi_fatal_error(dip, err, "unknown protocol"); + return (DDI_FAILURE); + } + } + } +#ifdef DEBUG + cmn_err(CE_NOTE, "xdb@%s: blkif protocol '%s' ", + ddi_get_name_addr(dip), protocol); +#endif + /* * map and init ring + * + * The ring parameters must match those which have been allocated + * in the front end. */ - err = xvdi_map_ring(dip, BLKIF_RING_SIZE, - sizeof (union blkif_sring_entry), gref, &vdp->xs_ring); + err = xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize, + gref, &vdp->xs_ring); if (err != DDI_SUCCESS) return (DDI_FAILURE); /* @@ -1224,6 +1272,7 @@ xdb_send_buf(void *arg) while ((bp = vdp->xs_f_iobuf) != NULL) { vdp->xs_f_iobuf = bp->av_forw; bp->av_forw = NULL; + vdp->xs_ionum++; mutex_exit(&vdp->xs_iomutex); if (bp->b_bcount != 0) { int err = ldi_strategy(vdp->xs_ldi_hdl, bp); @@ -1473,7 +1522,7 @@ static struct dev_ops xdb_dev_ops = { */ static struct modldrv modldrv = { &mod_driverops, /* Type of module. */ - "vbd backend driver %I%", /* Name of the module */ + "vbd backend driver 1.4", /* Name of the module */ &xdb_dev_ops /* driver ops */ }; @@ -1511,3 +1560,97 @@ _info(struct modinfo *modinfop) { return (mod_info(&xdb_modlinkage, modinfop)); } + +static int +xdb_get_request(xdb_t *vdp, blkif_request_t *req) +{ + void *src = xvdi_ring_get_request(vdp->xs_ring); + + if (src == NULL) + return (0); + + switch (vdp->xs_blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + (void) memcpy(req, src, sizeof (*req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(req, src); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(req, src); + break; + default: + cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", + ddi_get_name_addr(vdp->xs_dip), + vdp->xs_blk_protocol); + } + return (1); +} + +static int +xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status) +{ + ddi_acc_handle_t acchdl = vdp->xs_ring_hdl; + blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring); + blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp; + blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp; + + ASSERT(rsp); + + switch (vdp->xs_blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + ddi_put64(acchdl, &rsp->id, id); + ddi_put8(acchdl, &rsp->operation, op); + ddi_put16(acchdl, (uint16_t *)&rsp->status, + status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); + break; + case BLKIF_PROTOCOL_X86_32: + ddi_put64(acchdl, &rsp_32->id, id); + ddi_put8(acchdl, &rsp_32->operation, op); + ddi_put16(acchdl, (uint16_t *)&rsp_32->status, + status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); + break; + case BLKIF_PROTOCOL_X86_64: + ddi_put64(acchdl, &rsp_64->id, id); + ddi_put8(acchdl, &rsp_64->operation, op); + ddi_put16(acchdl, (uint16_t *)&rsp_64->status, + status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR); + break; + default: + cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d", + ddi_get_name_addr(vdp->xs_dip), + vdp->xs_blk_protocol); + } + + return (xvdi_ring_push_response(vdp->xs_ring)); +} + +static void +blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + if (n > src->nr_segments) + n = src->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +static void +blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + if (n > src->nr_segments) + n = src->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} diff --git a/usr/src/uts/common/xen/io/xdb.h b/usr/src/uts/common/xen/io/xdb.h index d4d744d2ac..0abd008d0a 100644 --- a/usr/src/uts/common/xen/io/xdb.h +++ b/usr/src/uts/common/xen/io/xdb.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -101,7 +101,7 @@ enum xdb_fe_state { #define XDB_INST2MINOR(i) (minor_t)(i) #define XDB_INST2SOFTS(instance) \ ((xdb_t *)ddi_get_soft_state(xdb_statep, (instance))) -#define XDB_MAX_IO_PAGES BLKIF_RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST +#define XDB_MAX_IO_PAGES(v) ((v)->xs_nentry * BLKIF_MAX_SEGMENTS_PER_REQUEST) /* get kva of a mapped-in page coresponding to (xreq-index, seg) pair */ #define XDB_IOPAGE_VA(_pagebase, _xreqidx, _seg) \ ((_pagebase) + ((_xreqidx) \ @@ -192,14 +192,17 @@ struct xdb { /* head of free list of xdb_request_t */ int xs_free_req; /* pre-allocated xdb_request_t pool */ - xdb_request_t xs_req[BLKIF_RING_SIZE]; + xdb_request_t *xs_req; kstat_t *xs_kstats; uint64_t xs_stat_req_reads; uint64_t xs_stat_req_writes; uint64_t xs_stat_req_barriers; uint64_t xs_stat_req_flushes; + enum blkif_protocol xs_blk_protocol; + size_t xs_nentry; + size_t xs_entrysize; #ifdef DEBUG - uint64_t page_addrs[XDB_MAX_IO_PAGES]; /* for debug aid */ + uint64_t *page_addrs; /* for debug aid */ #endif /* DEBUG */ }; diff --git a/usr/src/uts/common/xen/io/xdf.c b/usr/src/uts/common/xen/io/xdf.c index 4d695ec992..865eb69230 100644 --- a/usr/src/uts/common/xen/io/xdf.c +++ b/usr/src/uts/common/xen/io/xdf.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -56,6 +56,7 @@ #include <xen/sys/xendev.h> #include <sys/gnttab.h> #include <sys/scsi/generic/inquiry.h> +#include <xen/io/blkif_impl.h> #include <io/xdf.h> #define FLUSH_DISKCACHE 0x1 @@ -325,16 +326,6 @@ xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ddi_iblock_cookie_t ibc; ddi_iblock_cookie_t softibc; int instance; -#if defined(XPV_HVM_DRIVER) && defined(__i386) - /* XXX: 6609126 32-bit xdf driver panics on a 64-bit dom0 */ - extern int xen_is_64bit; - - if (xen_is_64bit) { - cmn_err(CE_WARN, "xdf cannot be used in 32-bit domUs on a" - " 64-bit dom0."); - return (DDI_FAILURE); - } -#endif xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, "xdfdebug", 0); @@ -1708,6 +1699,19 @@ trans_retry: goto abort_trans; } + /* + * "protocol" is written by the domain builder in the case of PV + * domains. However, it is not written for HVM domains, so let's + * write it here. + */ + if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", + XEN_IO_PROTO_ABI_NATIVE)) { + cmn_err(CE_WARN, "xdf@%s: failed to write protocol", + ddi_get_name_addr(dip)); + xvdi_fatal_error(dip, rv, "writing protocol"); + goto abort_trans; + } + if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { cmn_err(CE_WARN, "xdf@%s: " "failed to switch state to XenbusStateInitialised", diff --git a/usr/src/uts/common/xen/io/xdf.h b/usr/src/uts/common/xen/io/xdf.h index ea796772dd..4f8b635733 100644 --- a/usr/src/uts/common/xen/io/xdf.h +++ b/usr/src/uts/common/xen/io/xdf.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,7 +35,12 @@ extern "C" { #endif -#define BLKIF_RING_SIZE __RING_SIZE((blkif_sring_t *)NULL, PAGESIZE) +#define BLKIF_RING_SIZE \ + __RING_SIZE((blkif_sring_t *)NULL, PAGESIZE) +#define BLKIF_X86_32_RING_SIZE \ + __RING_SIZE((blkif_x86_32_sring_t *)NULL, PAGESIZE) +#define BLKIF_X86_64_RING_SIZE \ + __RING_SIZE((blkif_x86_64_sring_t *)NULL, PAGESIZE) /* * VBDs have standard 512 byte blocks diff --git a/usr/src/uts/common/xen/io/xnb.c b/usr/src/uts/common/xen/io/xnb.c index fd962b2bcc..e344dbab7a 100644 --- a/usr/src/uts/common/xen/io/xnb.c +++ b/usr/src/uts/common/xen/io/xnb.c @@ -114,11 +114,6 @@ int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2); boolean_t xnb_hv_copy = B_TRUE; boolean_t xnb_explicit_pageflip_set = B_FALSE; -#ifdef XNB_DEBUG -#define NR_GRANT_ENTRIES \ - (NR_GRANT_FRAMES * PAGESIZE / sizeof (grant_entry_t)) -#endif /* XNB_DEBUG */ - /* XXPV dme: are these really invalid? */ #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) #define INVALID_GRANT_REF ((grant_ref_t)-1) @@ -652,10 +647,6 @@ xnb_to_peer(xnb_t *xnbp, mblk_t *mp) cmn_err(CE_PANIC, "xnb_to_peer: " "id %d out of range in request 0x%p", rxreq->id, (void *)rxreq); - if (rxreq->gref >= NR_GRANT_ENTRIES) - cmn_err(CE_PANIC, "xnb_to_peer: " - "grant ref %d out of range in request 0x%p", - rxreq->gref, (void *)rxreq); #endif /* XNB_DEBUG */ /* Assign a pfn and map the new page at the allocated va. */ @@ -995,10 +986,6 @@ xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) cmn_err(CE_PANIC, "xnb_copy_to_peer: " "id %d out of range in request 0x%p", rxreq->id, (void *)rxreq); - if (rxreq->gref >= NR_GRANT_ENTRIES) - cmn_err(CE_PANIC, "xnb_copy_to_peer: " - "grant ref %d out of range in request 0x%p", - rxreq->gref, (void *)rxreq); #endif /* XNB_DEBUG */ /* 2 */ @@ -1482,8 +1469,6 @@ finished: rxp->xr_mop.ref = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref; - ASSERT(rxp->xr_mop.ref < NR_GRANT_ENTRIES); - *mop = rxp->xr_mop; *rxpp = rxp; } diff --git a/usr/src/uts/common/xen/io/xnf.c b/usr/src/uts/common/xen/io/xnf.c index 9fd2712824..597e4cf344 100644 --- a/usr/src/uts/common/xen/io/xnf.c +++ b/usr/src/uts/common/xen/io/xnf.c @@ -411,7 +411,8 @@ xnf_setup_rings(xnf_t *xnfp) gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); } else { - gnttab_grant_foreign_transfer_ref(ref, oeid); + gnttab_grant_foreign_transfer_ref(ref, + oeid, 0); } rxrp->id = ix; rxrp->gref = ref; @@ -1418,7 +1419,7 @@ rx_buffer_hang(xnf_t *xnfp, struct xnf_buffer_desc *bdesc) gnttab_grant_foreign_access_ref(ref, oeid, mfn, 0); } else { - gnttab_grant_foreign_transfer_ref(ref, oeid); + gnttab_grant_foreign_transfer_ref(ref, oeid, 0); } } reqp->id = hang_ix; diff --git a/usr/src/uts/common/xen/os/gnttab.c b/usr/src/uts/common/xen/os/gnttab.c index 238c45768e..a8e49feec0 100644 --- a/usr/src/uts/common/xen/os/gnttab.c +++ b/usr/src/uts/common/xen/os/gnttab.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,11 +31,14 @@ * * Granting foreign access to our memory reservation. * - * Copyright (c) 2005, Christopher Clark + * Copyright (c) 2005-2006, Christopher Clark * Copyright (c) 2004-2005, K A Fraser * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without @@ -90,49 +93,47 @@ #include <sys/bootvfs.h> #include <sys/bootprops.h> #include <vm/seg_kmem.h> +#include <sys/mman.h> -#define cmpxchg(t, c, n) atomic_cas_16((t), (c), (n)) - -/* External tools reserve first few grant table entries. */ -#define NR_RESERVED_ENTRIES 8 - -#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * \ - MMU_PAGESIZE / sizeof (grant_entry_t)) -#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1) -#define VALID_GRANT_REF(r) ((r) < NR_GRANT_ENTRIES) +/* Globals */ -static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; +static grant_ref_t **gnttab_list; +static uint_t nr_grant_frames; static int gnttab_free_count; static grant_ref_t gnttab_free_head; static kmutex_t gnttab_list_lock; - static grant_entry_t *shared; -#define GT_PGADDR(i) ((uintptr_t)shared + ((i) << PAGESHIFT)) +static struct gnttab_free_callback *gnttab_free_callback_list; -static struct gnttab_free_callback *gnttab_free_callback_list = NULL; +/* Macros */ -static int -get_free_entries(int count) +#define GT_PGADDR(i) ((uintptr_t)shared + ((i) << MMU_PAGESHIFT)) +#define VALID_GRANT_REF(r) ((r) < (nr_grant_frames * GREFS_PER_GRANT_FRAME)) +#define RPP (PAGESIZE / sizeof (grant_ref_t)) +#define GNTTAB_ENTRY(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) +#define CMPXCHG(t, c, n) atomic_cas_16((t), (c), (n)) +/* External tools reserve first few grant table entries. */ +#define NR_RESERVED_ENTRIES 8 +#define GNTTAB_LIST_END 0xffffffff +#define GREFS_PER_GRANT_FRAME (PAGESIZE / sizeof (grant_entry_t)) + +/* Implementation */ + +static uint_t +max_nr_grant_frames(void) { - int ref; - grant_ref_t head; + struct gnttab_query_size query; + int rc; - mutex_enter(&gnttab_list_lock); - if (gnttab_free_count < count) { - mutex_exit(&gnttab_list_lock); - return (-1); - } - ref = head = gnttab_free_head; - gnttab_free_count -= count; - while (count-- > 1) - head = gnttab_list[head]; - gnttab_free_head = gnttab_list[head]; - gnttab_list[head] = GNTTAB_LIST_END; - mutex_exit(&gnttab_list_lock); - return (ref); -} + query.dom = DOMID_SELF; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); + if ((rc < 0) || (query.status != GNTST_okay)) + return (4); /* Legacy max supported number of frames */ -#define get_free_entry() get_free_entries(1) + ASSERT(query.max_nr_frames); + return (query.max_nr_frames); +} static void do_free_callbacks(void) @@ -162,13 +163,79 @@ check_free_callbacks(void) do_free_callbacks(); } +static int +grow_gnttab_list(uint_t more_frames) +{ + uint_t new_nr_grant_frames, extra_entries, i; + + ASSERT(MUTEX_HELD(&gnttab_list_lock)); + + new_nr_grant_frames = nr_grant_frames + more_frames; + extra_entries = more_frames * GREFS_PER_GRANT_FRAME; + + for (i = nr_grant_frames; i < new_nr_grant_frames; i++) + gnttab_list[i] = kmem_alloc(PAGESIZE, KM_SLEEP); + + for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; + i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) + GNTTAB_ENTRY(i) = i + 1; + + GNTTAB_ENTRY(i) = gnttab_free_head; + gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; + gnttab_free_count += extra_entries; + + nr_grant_frames = new_nr_grant_frames; + + check_free_callbacks(); + + return (0); +} + +static int +gnttab_expand(uint_t req_entries) +{ + uint_t cur, extra; + + ASSERT(MUTEX_HELD(&gnttab_list_lock)); + + cur = nr_grant_frames; + extra = ((req_entries + (GREFS_PER_GRANT_FRAME - 1)) / + GREFS_PER_GRANT_FRAME); + if (cur + extra > max_nr_grant_frames()) + return (-1); + + return (grow_gnttab_list(extra)); +} + +static int +get_free_entries(int count) +{ + int ref, rc; + grant_ref_t head; + + mutex_enter(&gnttab_list_lock); + if (gnttab_free_count < count && + ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { + mutex_exit(&gnttab_list_lock); + return (rc); + } + ref = head = gnttab_free_head; + gnttab_free_count -= count; + while (count-- > 1) + head = GNTTAB_ENTRY(head); + gnttab_free_head = GNTTAB_ENTRY(head); + GNTTAB_ENTRY(head) = GNTTAB_LIST_END; + mutex_exit(&gnttab_list_lock); + return (ref); +} + static void put_free_entry(grant_ref_t ref) { ASSERT(VALID_GRANT_REF(ref)); mutex_enter(&gnttab_list_lock); - gnttab_list[ref] = gnttab_free_head; + GNTTAB_ENTRY(ref) = gnttab_free_head; gnttab_free_head = ref; gnttab_free_count++; check_free_callbacks(); @@ -184,7 +251,7 @@ gnttab_grant_foreign_access(domid_t domid, gnttab_frame_t frame, int readonly) { int ref; - if ((ref = get_free_entry()) == -1) + if ((ref = get_free_entries(1)) == -1) return (-1); ASSERT(VALID_GRANT_REF(ref)); @@ -236,7 +303,7 @@ gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) cmn_err(CE_WARN, "g.e. still in use!"); return (0); } - } while ((nflags = cmpxchg(&shared[ref].flags, flags, 0)) != flags); + } while ((nflags = CMPXCHG(&shared[ref].flags, flags, 0)) != flags); return (1); } @@ -266,29 +333,26 @@ gnttab_end_foreign_access(grant_ref_t ref, int readonly, gnttab_frame_t page) } int -gnttab_grant_foreign_transfer(domid_t domid) +gnttab_grant_foreign_transfer(domid_t domid, pfn_t pfn) { int ref; - if ((ref = get_free_entry()) == -1) + if ((ref = get_free_entries(1)) == -1) return (-1); ASSERT(VALID_GRANT_REF(ref)); - shared[ref].frame = 0; - shared[ref].domid = domid; - membar_producer(); - shared[ref].flags = GTF_accept_transfer; + gnttab_grant_foreign_transfer_ref(ref, domid, pfn); return (ref); } void -gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid) +gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, pfn_t pfn) { ASSERT(VALID_GRANT_REF(ref)); - shared[ref].frame = 0; + shared[ref].frame = pfn; shared[ref].domid = domid; membar_producer(); shared[ref].flags = GTF_accept_transfer; @@ -307,7 +371,7 @@ gnttab_end_foreign_transfer_ref(grant_ref_t ref) * reference and return failure (== 0). */ while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { - if (cmpxchg(&shared[ref].flags, flags, 0) == flags) + if (CMPXCHG(&shared[ref].flags, flags, 0) == flags) return (0); (void) HYPERVISOR_yield(); } @@ -356,11 +420,11 @@ gnttab_free_grant_references(grant_ref_t head) return; mutex_enter(&gnttab_list_lock); ref = head; - while (gnttab_list[ref] != GNTTAB_LIST_END) { - ref = gnttab_list[ref]; + while (GNTTAB_ENTRY(ref) != GNTTAB_LIST_END) { + ref = GNTTAB_ENTRY(ref); count++; } - gnttab_list[ref] = gnttab_free_head; + GNTTAB_ENTRY(ref) = gnttab_free_head; gnttab_free_head = head; gnttab_free_count += count; check_free_callbacks(); @@ -381,13 +445,19 @@ gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) } int +gnttab_empty_grant_references(const grant_ref_t *private_head) +{ + return (*private_head == GNTTAB_LIST_END); +} + +int gnttab_claim_grant_reference(grant_ref_t *private_head) { grant_ref_t g = *private_head; if (g == GNTTAB_LIST_END) return (-1); - *private_head = gnttab_list[g]; + *private_head = GNTTAB_ENTRY(g); return (g); } @@ -396,7 +466,7 @@ gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) { ASSERT(VALID_GRANT_REF(release)); - gnttab_list[release] = *private_head; + GNTTAB_ENTRY(release) = *private_head; *private_head = release; } @@ -417,8 +487,45 @@ out: mutex_exit(&gnttab_list_lock); } -#ifdef XPV_HVM_DRIVER +void +gnttab_cancel_free_callback(struct gnttab_free_callback *callback) +{ + struct gnttab_free_callback **pcb; + + mutex_enter(&gnttab_list_lock); + for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { + if (*pcb == callback) { + *pcb = callback->next; + break; + } + } + mutex_exit(&gnttab_list_lock); +} + +static gnttab_frame_t * +gnttab_setup(gnttab_setup_table_t *pset) +{ + gnttab_frame_t *frames; + + frames = kmem_alloc(pset->nr_frames * sizeof (gnttab_frame_t), + KM_SLEEP); + + /*LINTED: constant in conditional context*/ + set_xen_guest_handle(pset->frame_list, frames); + + /* + * Take pset->nr_frames pages of grant table space from + * the hypervisor and map it + */ + if ((HYPERVISOR_grant_table_op(GNTTABOP_setup_table, pset, 1) != 0) || + (pset->status != 0)) { + cmn_err(CE_PANIC, "Grant Table setup failed"); + } + + return (frames); +} +#ifdef XPV_HVM_DRIVER static void gnttab_map(void) { @@ -428,114 +535,117 @@ gnttab_map(void) int i; va = (caddr_t)shared; - for (i = 0; i < NR_GRANT_FRAMES; i++) { - pfn = hat_getpfnum(kas.a_hat, va); + for (i = 0; i < max_nr_grant_frames(); i++) { + if ((pfn = hat_getpfnum(kas.a_hat, va)) == PFN_INVALID) + cmn_err(CE_PANIC, "gnttab_map: Invalid pfn"); xatp.domid = DOMID_SELF; xatp.idx = i; xatp.space = XENMAPSPACE_grant_table; xatp.gpfn = pfn; hat_unload(kas.a_hat, va, MMU_PAGESIZE, HAT_UNLOAD); + /* + * This call replaces the existing machine page backing + * the given gpfn with the page from the allocated grant + * table at index idx. The existing machine page is + * returned to the free list. + */ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0) panic("Couldn't map grant table"); - hat_devload(kas.a_hat, va, MMU_PAGESIZE, pfn, PROT_READ | PROT_WRITE, HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); - va += MMU_PAGESIZE; } } +#endif /* XPV_HVM_DRIVER */ void gnttab_init(void) { + gnttab_setup_table_t set; int i; + uint_t nr_init_grefs, max_nr_glist_frames; + gnttab_frame_t *frames; - shared = (grant_entry_t *)xen_alloc_pages(NR_GRANT_FRAMES); + /* + * gnttab_init() should only be invoked once. + */ + mutex_enter(&gnttab_list_lock); + ASSERT(nr_grant_frames == 0); + nr_grant_frames = 1; + mutex_exit(&gnttab_list_lock); - gnttab_map(); + max_nr_glist_frames = (max_nr_grant_frames() * + GREFS_PER_GRANT_FRAME / (PAGESIZE / sizeof (grant_ref_t))); - for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) - gnttab_list[i] = i + 1; - gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES; - gnttab_free_head = NR_RESERVED_ENTRIES; + set.dom = DOMID_SELF; + set.nr_frames = max_nr_grant_frames(); + frames = gnttab_setup(&set); - mutex_init(&gnttab_list_lock, NULL, MUTEX_DEFAULT, NULL); -} +#ifdef XPV_HVM_DRIVER + shared = (grant_entry_t *)xen_alloc_pages(set.nr_frames); -void -gnttab_resume(void) -{ gnttab_map(); -} - #else /* XPV_HVM_DRIVER */ + shared = vmem_xalloc(heap_arena, set.nr_frames * MMU_PAGESIZE, + MMU_PAGESIZE, 0, 0, 0, 0, VM_SLEEP); + for (i = 0; i < set.nr_frames; i++) { + hat_devload(kas.a_hat, (caddr_t)GT_PGADDR(i), PAGESIZE, + xen_assign_pfn(frames[i]), PROT_READ | PROT_WRITE, + HAT_LOAD_LOCK); + } +#endif -void -gnttab_init(void) -{ - gnttab_setup_table_t set; - gnttab_frame_t frames[NR_GRANT_FRAMES]; - int i; - - set.dom = DOMID_SELF; - set.nr_frames = NR_GRANT_FRAMES; - /*LINTED: constant in conditional context*/ - set_xen_guest_handle(set.frame_list, frames); + gnttab_list = kmem_alloc(max_nr_glist_frames * sizeof (grant_ref_t *), + KM_SLEEP); - /* - * Take 4 pages of grant table space from the hypervisor and map it - */ - if ((HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &set, 1) != 0) || - (set.status != 0)) { - cmn_err(CE_PANIC, "Grant Table setup failed"); + for (i = 0; i < nr_grant_frames; i++) { + gnttab_list[i] = kmem_alloc(PAGESIZE, KM_SLEEP); } - shared = vmem_xalloc(heap_arena, NR_GRANT_FRAMES * MMU_PAGESIZE, - MMU_PAGESIZE, 0, 0, 0, 0, VM_SLEEP); + kmem_free(frames, set.nr_frames * sizeof (gnttab_frame_t)); - for (i = 0; i < NR_GRANT_FRAMES; i++) - kbm_map_ma(FRAME_TO_MA(frames[i]), GT_PGADDR(i), 0); + nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; - for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) - gnttab_list[i] = i + 1; - gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES; - gnttab_free_head = NR_RESERVED_ENTRIES; + for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) + GNTTAB_ENTRY(i) = i + 1; - mutex_init(&gnttab_list_lock, NULL, MUTEX_DEFAULT, NULL); + GNTTAB_ENTRY(nr_init_grefs - 1) = GNTTAB_LIST_END; + gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; + gnttab_free_head = NR_RESERVED_ENTRIES; } void gnttab_resume(void) { gnttab_setup_table_t set; - gnttab_frame_t frames[NR_GRANT_FRAMES]; int i; + gnttab_frame_t *frames; + uint_t available_frames = max_nr_grant_frames(); - set.dom = DOMID_SELF; - set.nr_frames = NR_GRANT_FRAMES; - /*LINTED: constant in conditional context*/ - set_xen_guest_handle(set.frame_list, frames); - - /* - * Take NR_GRANT_FRAMES pages of grant table space from the - * hypervisor and map it - */ - if ((HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &set, 1) != 0) || - (set.status != 0)) { - cmn_err(CE_PANIC, "Grant Table setup failed"); + if (available_frames < nr_grant_frames) { + cmn_err(CE_PANIC, "Hypervisor does not have enough grant " + "frames: required(%u), available(%u)", nr_grant_frames, + available_frames); } - for (i = 0; i < NR_GRANT_FRAMES; i++) { +#ifdef XPV_HVM_DRIVER + gnttab_map(); +#endif /* XPV_HVM_DRIVER */ + + set.dom = DOMID_SELF; + set.nr_frames = available_frames; + frames = gnttab_setup(&set); + + for (i = 0; i < available_frames; i++) { (void) HYPERVISOR_update_va_mapping(GT_PGADDR(i), FRAME_TO_MA(frames[i]) | PT_VALID | PT_WRITABLE, UVMF_INVLPG | UVMF_ALL); } + kmem_free(frames, set.nr_frames * sizeof (gnttab_frame_t)); } -#endif /* XPV_HVM_DRIVER */ - void gnttab_suspend(void) { @@ -544,7 +654,7 @@ gnttab_suspend(void) /* * clear grant table mappings before suspending */ - for (i = 0; i < NR_GRANT_FRAMES; i++) { + for (i = 0; i < max_nr_grant_frames(); i++) { (void) HYPERVISOR_update_va_mapping(GT_PGADDR(i), 0, UVMF_INVLPG); } diff --git a/usr/src/uts/common/xen/os/hypercall.c b/usr/src/uts/common/xen/os/hypercall.c index fae533dfbf..564c5d2fd6 100644 --- a/usr/src/uts/common/xen/os/hypercall.c +++ b/usr/src/uts/common/xen/os/hypercall.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -300,9 +300,9 @@ HYPERVISOR_mmuext_op(struct mmuext_op *req, int count, uint_t *success_count, } long -HYPERVISOR_acm_op(int cmd, void *arg) +HYPERVISOR_acm_op(struct xen_acmctl *arg) { - return (__hypercall2(__HYPERVISOR_acm_op, (long)cmd, (ulong_t)arg)); + return (__hypercall1(__HYPERVISOR_acm_op, (ulong_t)arg)); } long diff --git a/usr/src/uts/common/xen/public/acm.h b/usr/src/uts/common/xen/public/acm.h index 23078837fb..ef62da0201 100644 --- a/usr/src/uts/common/xen/public/acm.h +++ b/usr/src/uts/common/xen/public/acm.h @@ -56,6 +56,19 @@ #define ACM_ACCESS_DENIED -111 #define ACM_NULL_POINTER_ERROR -200 +/* + Error codes reported in when trying to test for a new policy + These error codes are reported in an array of tuples where + each error code is followed by a parameter describing the error + more closely, such as a domain id. +*/ +#define ACM_EVTCHN_SHARING_VIOLATION 0x100 +#define ACM_GNTTAB_SHARING_VIOLATION 0x101 +#define ACM_DOMAIN_LOOKUP 0x102 +#define ACM_CHWALL_CONFLICT 0x103 +#define ACM_SSIDREF_IN_USE 0x104 + + /* primary policy in lower 4 bits */ #define ACM_NULL_POLICY 0 #define ACM_CHINESE_WALL_POLICY 1 @@ -78,7 +91,7 @@ * whenever the interpretation of the related * policy's data structure changes */ -#define ACM_POLICY_VERSION 2 +#define ACM_POLICY_VERSION 3 #define ACM_CHWALL_VERSION 1 #define ACM_STE_VERSION 1 @@ -119,6 +132,14 @@ typedef uint16_t domaintype_t; /* each offset in bytes from start of the struct they * are part of */ +/* V3 of the policy buffer aded a version structure */ +struct acm_policy_version +{ + uint32_t major; + uint32_t minor; +}; + + /* each buffer consists of all policy information for * the respective policy given in the policy code * @@ -136,8 +157,10 @@ struct acm_policy_buffer { uint32_t primary_buffer_offset; uint32_t secondary_policy_code; uint32_t secondary_buffer_offset; + struct acm_policy_version xml_pol_version; /* add in V3 */ }; + struct acm_policy_reference_buffer { uint32_t len; }; diff --git a/usr/src/uts/common/xen/public/acm_ops.h b/usr/src/uts/common/xen/public/acm_ops.h index 5e103dca7b..27a88720a7 100644 --- a/usr/src/uts/common/xen/public/acm_ops.h +++ b/usr/src/uts/common/xen/public/acm_ops.h @@ -34,7 +34,7 @@ * This makes sure that old versions of acm tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define ACM_INTERFACE_VERSION 0xAAAA0008 +#define ACM_INTERFACE_VERSION 0xAAAA000A /************************************************************************/ @@ -49,8 +49,7 @@ #define ACMOP_setpolicy 1 struct acm_setpolicy { /* IN */ - uint32_t interface_version; - XEN_GUEST_HANDLE(void) pushcache; + XEN_GUEST_HANDLE_64(void) pushcache; uint32_t pushcache_size; }; @@ -58,8 +57,7 @@ struct acm_setpolicy { #define ACMOP_getpolicy 2 struct acm_getpolicy { /* IN */ - uint32_t interface_version; - XEN_GUEST_HANDLE(void) pullcache; + XEN_GUEST_HANDLE_64(void) pullcache; uint32_t pullcache_size; }; @@ -67,8 +65,7 @@ struct acm_getpolicy { #define ACMOP_dumpstats 3 struct acm_dumpstats { /* IN */ - uint32_t interface_version; - XEN_GUEST_HANDLE(void) pullcache; + XEN_GUEST_HANDLE_64(void) pullcache; uint32_t pullcache_size; }; @@ -78,20 +75,18 @@ struct acm_dumpstats { #define ACM_GETBY_domainid 2 struct acm_getssid { /* IN */ - uint32_t interface_version; uint32_t get_ssid_by; /* ACM_GETBY_* */ union { domaintype_t domainid; ssidref_t ssidref; } id; - XEN_GUEST_HANDLE(void) ssidbuf; + XEN_GUEST_HANDLE_64(void) ssidbuf; uint32_t ssidbuf_size; }; #define ACMOP_getdecision 5 struct acm_getdecision { /* IN */ - uint32_t interface_version; uint32_t get_decision_by1; /* ACM_GETBY_* */ uint32_t get_decision_by2; /* ACM_GETBY_* */ union { @@ -107,6 +102,50 @@ struct acm_getdecision { uint32_t acm_decision; }; + +#define ACMOP_chgpolicy 6 +struct acm_change_policy { + /* IN */ + XEN_GUEST_HANDLE_64(void) policy_pushcache; + uint32_t policy_pushcache_size; + XEN_GUEST_HANDLE_64(void) del_array; + uint32_t delarray_size; + XEN_GUEST_HANDLE_64(void) chg_array; + uint32_t chgarray_size; + /* OUT */ + /* array with error code */ + XEN_GUEST_HANDLE_64(void) err_array; + uint32_t errarray_size; +}; + +#define ACMOP_relabeldoms 7 +struct acm_relabel_doms { + /* IN */ + XEN_GUEST_HANDLE_64(void) relabel_map; + uint32_t relabel_map_size; + /* OUT */ + XEN_GUEST_HANDLE_64(void) err_array; + uint32_t errarray_size; +}; + +/* future interface to Xen */ +struct xen_acmctl { + uint32_t cmd; + uint32_t interface_version; + union { + struct acm_setpolicy setpolicy; + struct acm_getpolicy getpolicy; + struct acm_dumpstats dumpstats; + struct acm_getssid getssid; + struct acm_getdecision getdecision; + struct acm_change_policy change_policy; + struct acm_relabel_doms relabel_doms; + } u; +}; + +typedef struct xen_acmctl xen_acmctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_acmctl_t); + #endif /* __XEN_PUBLIC_ACM_OPS_H__ */ /* diff --git a/usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h b/usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h index 8e508f2f57..01f9f982fe 100644 --- a/usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h +++ b/usr/src/uts/common/xen/public/arch-x86/xen-x86_32.h @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * - * Copyright (c) 2004-2006, K A Fraser + * Copyright (c) 2004-2007, K A Fraser */ #ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ @@ -115,6 +115,32 @@ #define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) #endif +/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ +#undef __DEFINE_XEN_GUEST_HANDLE + +#ifdef __GNUC__ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } \ + __guest_handle_ ## name; \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_64_ ## name +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) +#else +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } \ + __guest_handle_ ## name; \ + typedef struct { union { type *p; uint64_aligned_t q; }u; } \ + __guest_handle_64_ ## name +#define uint64_aligned_t uint64_t +#endif + +#undef set_xen_guest_handle +#define set_xen_guest_handle(hnd, val) \ + do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while ( 0 ) +#define XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name + #ifndef __ASSEMBLY__ struct cpu_user_regs { diff --git a/usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h b/usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h index f011db9073..2a63318904 100644 --- a/usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h +++ b/usr/src/uts/common/xen/public/arch-x86/xen-x86_64.h @@ -55,7 +55,7 @@ * XXPV HACK, we don't support the hypercall page yet. * #endif */ - + /* * 64-bit segment selectors * These flat segments are in the Xen-private section of every GDT. Since these @@ -151,7 +151,10 @@ struct iret_context { #ifdef __GNUC__ /* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ -#define __DECL_REG(name) union { uint64_t r ## name, e ## name; } +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} #else /* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ #define __DECL_REG(name) uint64_t r ## name diff --git a/usr/src/uts/common/xen/public/arch-x86/xen.h b/usr/src/uts/common/xen/public/arch-x86/xen.h index df53dbe9d2..cbecb15eef 100644 --- a/usr/src/uts/common/xen/public/arch-x86/xen.h +++ b/usr/src/uts/common/xen/public/arch-x86/xen.h @@ -37,16 +37,23 @@ #endif #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) -#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#if !defined(__GNUC__) && defined(__i386__) +#define set_xen_guest_handle_u(hnd, val) do { (hnd).u.p = val; } while (0) +#define get_xen_guest_handle_u(val, hnd) do { val = (hnd).u.p; } while (0) +#else +#define set_xen_guest_handle_u(hnd, val) do { (hnd).p = val; } while (0) +#define get_xen_guest_handle_u(val, hnd) do { val = (hnd).p; } while (0) +#endif #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) -/* - * XXPV - we need get in privcmd - * #ifdef __XEN_TOOLS__ - */ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -/* - * #endif - */ + +#if defined(__i386__) +#include "xen-x86_32.h" +#elif defined(__x86_64__) +#include "xen-x86_64.h" +#endif #ifndef __ASSEMBLY__ /* Guest handles for primitive C types. */ @@ -60,12 +67,7 @@ DEFINE_XEN_GUEST_HANDLE(void); typedef unsigned long xen_pfn_t; DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); -#endif - -#if defined(__i386__) -#include "xen-x86_32.h" -#elif defined(__x86_64__) -#include "xen-x86_64.h" +#define PRI_xen_pfn "lx" #endif /* @@ -130,12 +132,15 @@ struct vcpu_guest_context { #define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) #define _VGCF_syscall_disables_events 4 #define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ #ifdef __i386__ @@ -146,8 +151,18 @@ struct vcpu_guest_context { #else unsigned long event_callback_eip; unsigned long failsafe_callback_eip; +#ifdef __XEN__ + union { + unsigned long syscall_callback_eip; + struct { + unsigned int event_callback_cs; /* compat CS of event cb */ + unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ + }; + }; +#else unsigned long syscall_callback_eip; #endif +#endif unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ #ifdef __x86_64__ /* Segment base addresses. */ @@ -176,6 +191,8 @@ struct panic_info { char *pi_panicstr; /* panic message */ void *pi_ram_start; /* Start of all-RAM mapping region */ void *pi_ram_end; /* End of all-RAM mapping region */ + void *pi_xen_start; /* Start of Xen's text/heap */ + void *pi_xen_end; /* End of Xen's text/heap */ void *pi_stktop; /* Top of current Xen stack */ struct domain *pi_domain; /* Panicking domain */ struct vcpu *pi_vcpu; /* Panicking vcpu */ @@ -187,7 +204,7 @@ struct panic_frame { unsigned long pf_pc; }; -#define PANIC_INFO_VERSION 1 +#define PANIC_INFO_VERSION 2 #endif /* !__ASSEMBLY__ */ diff --git a/usr/src/uts/common/xen/public/domctl.h b/usr/src/uts/common/xen/public/domctl.h index 8f176f5412..d061a9785f 100644 --- a/usr/src/uts/common/xen/public/domctl.h +++ b/usr/src/uts/common/xen/public/domctl.h @@ -42,11 +42,12 @@ #include "xen.h" -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000004 +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005 struct xenctl_cpumap { - XEN_GUEST_HANDLE(uint8_t) bitmap; + XEN_GUEST_HANDLE_64(uint8_t) bitmap; uint32_t nr_cpus; + uint8_t pad[4]; }; /* @@ -69,6 +70,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); #define XEN_DOMCTL_destroydomain 2 #define XEN_DOMCTL_pausedomain 3 #define XEN_DOMCTL_unpausedomain 4 +#define XEN_DOMCTL_resumedomain 27 #define XEN_DOMCTL_getdomaininfo 5 struct xen_domctl_getdomaininfo { @@ -92,6 +94,9 @@ struct xen_domctl_getdomaininfo { /* Domain is currently running. */ #define _XEN_DOMINF_running 5 #define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) + /* Being debugged. */ +#define _XEN_DOMINF_debugged 6 +#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) /* CPU to which this domain is bound. */ #define XEN_DOMINF_cpumask 255 #define XEN_DOMINF_cpushift 8 @@ -99,14 +104,15 @@ struct xen_domctl_getdomaininfo { #define XEN_DOMINF_shutdownmask 255 #define XEN_DOMINF_shutdownshift 16 uint32_t flags; /* XEN_DOMINF_* */ - uint64_t tot_pages; - uint64_t max_pages; - uint64_t shared_info_frame; /* GMFN of shared_info struct */ - uint64_t cpu_time; + uint64_aligned_t tot_pages; + uint64_aligned_t max_pages; + uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ + uint64_aligned_t cpu_time; uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ uint32_t ssidref; xen_domain_handle_t handle; + uint8_t pad[4]; }; typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); @@ -116,12 +122,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); struct xen_domctl_getmemlist { /* IN variables. */ /* Max entries to write to output buffer. */ - uint64_t max_pfns; + uint64_aligned_t max_pfns; /* Start index in guest's page list. */ - uint64_t start_pfn; - XEN_GUEST_HANDLE(xen_pfn_t) buffer; + uint64_aligned_t start_pfn; + XEN_GUEST_HANDLE_64(uint64_t) buffer; /* OUT variables. */ - uint64_t num_pfns; + uint64_aligned_t num_pfns; }; typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); @@ -130,22 +136,23 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); #define XEN_DOMCTL_getpageframeinfo 7 #define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 -#define XEN_DOMCTL_PFINFO_NOTAB (0x0<<28) -#define XEN_DOMCTL_PFINFO_L1TAB (0x1<<28) -#define XEN_DOMCTL_PFINFO_L2TAB (0x2<<28) -#define XEN_DOMCTL_PFINFO_L3TAB (0x3<<28) -#define XEN_DOMCTL_PFINFO_L4TAB (0x4<<28) -#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7<<28) -#define XEN_DOMCTL_PFINFO_LPINTAB (0x1<<31) -#define XEN_DOMCTL_PFINFO_XTAB (0xf<<28) /* invalid page */ -#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xf<<28) +#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) +#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) +#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) +#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) +#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) +#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) +#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) +#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ +#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) struct xen_domctl_getpageframeinfo { /* IN variables. */ - uint64_t gmfn; /* GMFN to query */ + uint64_aligned_t gmfn; /* GMFN to query */ /* OUT variables. */ /* Is the page PINNED to a type? */ uint32_t type; /* see above type defs */ + uint8_t pad[4]; }; typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); @@ -154,9 +161,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); #define XEN_DOMCTL_getpageframeinfo2 8 struct xen_domctl_getpageframeinfo2 { /* IN variables. */ - uint64_t num; + uint64_aligned_t num; /* IN/OUT variables. */ - XEN_GUEST_HANDLE(ulong) array; + XEN_GUEST_HANDLE_64(uint32_t) array; }; typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); @@ -228,10 +235,11 @@ struct xen_domctl_shadow_op { /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ uint32_t mb; /* Shadow memory allocation in MB */ + uint8_t pad[4]; /* OP_PEEK / OP_CLEAN */ - XEN_GUEST_HANDLE(ulong) dirty_bitmap; - uint64_t pages; /* Size of buffer. Updated with actual size. */ + XEN_GUEST_HANDLE_64(uint8_t) dirty_bitmap; + uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ struct xen_domctl_shadow_op_stats stats; }; typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; @@ -241,7 +249,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); #define XEN_DOMCTL_max_mem 11 struct xen_domctl_max_mem { /* IN variables. */ - uint64_t max_memkb; + uint64_aligned_t max_memkb; }; typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); @@ -251,7 +259,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); #define XEN_DOMCTL_getvcpucontext 13 struct xen_domctl_vcpucontext { uint32_t vcpu; /* IN */ - XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; /* IN/OUT */ + uint8_t pad[4]; + XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ }; typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); @@ -265,8 +274,10 @@ struct xen_domctl_getvcpuinfo { uint8_t online; /* currently online (not hotplugged)? */ uint8_t blocked; /* blocked waiting for an event? */ uint8_t running; /* currently scheduled on its CPU? */ - uint64_t cpu_time; /* total cpu time consumed (ns) */ + uint8_t pad1; + uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ uint32_t cpu; /* current mapping */ + uint8_t pad2[4]; }; typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); @@ -277,6 +288,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); #define XEN_DOMCTL_getvcpuaffinity 25 struct xen_domctl_vcpuaffinity { uint32_t vcpu; /* IN */ + uint8_t pad[4]; struct xenctl_cpumap cpumap; /* IN/OUT */ }; typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; @@ -303,9 +315,9 @@ struct xen_domctl_scheduler_op { uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ union { struct xen_domctl_sched_sedf { - uint64_t period; - uint64_t slice; - uint64_t latency; + uint64_aligned_t period; + uint64_aligned_t slice; + uint64_aligned_t latency; uint32_t extratime; uint32_t weight; } sedf; @@ -346,9 +358,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); #define XEN_DOMCTL_iomem_permission 20 struct xen_domctl_iomem_permission { - uint64_t first_mfn; /* first page (physical page number) in range */ - uint64_t nr_mfns; /* number of pages in range (>0) */ - uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ + uint64_aligned_t first_mfn;/* first page (physical page number) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ }; typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); @@ -359,33 +371,37 @@ struct xen_domctl_ioport_permission { uint32_t first_port; /* first port int range */ uint32_t nr_ports; /* size of port range */ uint8_t allow_access; /* allow or deny access to range? */ + uint8_t pad[3]; }; typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); + #define XEN_DOMCTL_hypercall_init 22 struct xen_domctl_hypercall_init { - uint64_t gmfn; /* GMFN to be initialised */ + uint64_aligned_t gmfn; /* GMFN to be initialised */ }; typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); + #define XEN_DOMCTL_arch_setup 23 #define _XEN_DOMAINSETUP_hvm_guest 0 #define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) #define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ #define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query) typedef struct xen_domctl_arch_setup { - uint64_t flags; /* XEN_DOMAINSETUP_* */ + uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */ #ifdef __ia64__ - uint64_t bp; /* mpaddr of boot param area */ - uint64_t maxmem; /* Highest memory address for MDT. */ - uint64_t xsi_va; /* Xen shared_info area virtual address. */ - uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */ + uint64_aligned_t bp; /* mpaddr of boot param area */ + uint64_aligned_t maxmem; /* Highest memory address for MDT. */ + uint64_aligned_t xsi_va; /* Xen shared_info area virtual address. */ + uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */ #endif } xen_domctl_arch_setup_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); + #define XEN_DOMCTL_settimeoffset 24 struct xen_domctl_settimeoffset { int32_t time_offset_seconds; /* applied to domain wallclock time */ @@ -393,17 +409,53 @@ struct xen_domctl_settimeoffset { typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); -#define XEN_DOMCTL_real_mode_area 26 + +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +typedef struct xen_domctl_hvmcontext { + uint32_t size; /* IN/OUT: size of buffer / bytes filled */ + uint8_t pad[4]; + XEN_GUEST_HANDLE_64(uint8_t) buffer; /* IN/OUT: data, or call + * gethvmcontext with NULL + * buffer to get size + * req'd */ +} xen_domctl_hvmcontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); + + +#define XEN_DOMCTL_set_address_size 35 +#define XEN_DOMCTL_get_address_size 36 +typedef struct xen_domctl_address_size { + uint32_t size; +} xen_domctl_address_size_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); + + +#define XEN_DOMCTL_real_mode_area 26 struct xen_domctl_real_mode_area { uint32_t log; /* log2 of Real Mode Area size */ }; typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); + +#define XEN_DOMCTL_sendtrigger 28 +#define XEN_DOMCTL_SENDTRIGGER_NMI 0 +#define XEN_DOMCTL_SENDTRIGGER_RESET 1 +#define XEN_DOMCTL_SENDTRIGGER_INIT 2 +struct xen_domctl_sendtrigger { + uint32_t trigger; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); + + struct xen_domctl { uint32_t cmd; uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ domid_t domain; + uint8_t pad[6]; union { struct xen_domctl_createdomain createdomain; struct xen_domctl_getdomaininfo getdomaininfo; @@ -426,6 +478,9 @@ struct xen_domctl { struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_address_size address_size; + struct xen_domctl_sendtrigger sendtrigger; uint8_t pad[128]; } u; }; diff --git a/usr/src/uts/common/xen/public/elfnote.h b/usr/src/uts/common/xen/public/elfnote.h index 4924767d30..77be41bb4b 100644 --- a/usr/src/uts/common/xen/public/elfnote.h +++ b/usr/src/uts/common/xen/public/elfnote.h @@ -28,7 +28,7 @@ #define __XEN_PUBLIC_ELFNOTE_H__ /* - * The notes should live in a SHT_NOTE segment and have "Xen" in the + * The notes should live in a PT_NOTE segment and have "Xen" in the * name field. * * Numeric types are either 4 or 8 bytes depending on the content of @@ -40,8 +40,6 @@ /* * NAME=VALUE pair (string). - * - * LEGACY: FEATURES and PAE */ #define XEN_ELFNOTE_INFO 0 @@ -108,7 +106,12 @@ #define XEN_ELFNOTE_LOADER 8 /* - * The kernel supports PAE (x86/32 only, string = "yes" or "no"). + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. * * LEGACY: PAE (n.b. The legacy interface included a provision to * indicate 'extended-cr3' support allowing L3 page tables to be @@ -148,6 +151,22 @@ #define XEN_ELFNOTE_HV_START_LOW 12 /* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + +/* + * The number of the highest elfnote defined. + */ +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL + +/* * System information exported through crash notes. * * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO @@ -166,6 +185,41 @@ */ #define XEN_ELFNOTE_CRASH_REGS 0x1000002 + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 + #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ /* diff --git a/usr/src/uts/common/xen/public/event_channel.h b/usr/src/uts/common/xen/public/event_channel.h index 62cf764040..d35cce53e4 100644 --- a/usr/src/uts/common/xen/public/event_channel.h +++ b/usr/src/uts/common/xen/public/event_channel.h @@ -217,6 +217,19 @@ struct evtchn_unmask { typedef struct evtchn_unmask evtchn_unmask_t; /* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset 10 +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* * Argument to event_channel_op_compat() hypercall. Superceded by new * event_channel_op() hypercall since 0x00030202. */ diff --git a/usr/src/uts/common/xen/public/grant_table.h b/usr/src/uts/common/xen/public/grant_table.h index 9622b56d02..222ac37b86 100644 --- a/usr/src/uts/common/xen/public/grant_table.h +++ b/usr/src/uts/common/xen/public/grant_table.h @@ -309,6 +309,25 @@ typedef struct gnttab_copy { } gnttab_copy_t; DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +#define GNTTABOP_query_size 6 +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + /* * Bitfield values for update_pin_status.flags. diff --git a/usr/src/uts/common/xen/public/hvm/hvm_op.h b/usr/src/uts/common/xen/public/hvm/hvm_op.h index 8322f32ee2..b21b0f7abe 100644 --- a/usr/src/uts/common/xen/public/hvm/hvm_op.h +++ b/usr/src/uts/common/xen/public/hvm/hvm_op.h @@ -1,3 +1,23 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ @@ -50,4 +70,7 @@ struct xen_hvm_set_pci_link_route { typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); +/* Flushes all VCPU TLBs: @arg must be NULL. */ +#define HVMOP_flush_tlbs 5 + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/usr/src/uts/common/xen/public/hvm/params.h b/usr/src/uts/common/xen/public/hvm/params.h index caa1f1f545..9657654870 100644 --- a/usr/src/uts/common/xen/public/hvm/params.h +++ b/usr/src/uts/common/xen/public/hvm/params.h @@ -1,4 +1,3 @@ - /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -24,13 +23,38 @@ #include "hvm_op.h" -/* Parameter space for HVMOP_{set,get}_param. */ +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +/* + * How should CPU0 event-channel notifications be delivered? + * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). + * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: + * Domain = val[47:32], Bus = val[31:16], + * DevFn = val[15: 8], IntX = val[ 1: 0] + * If val == 0 then CPU0 event-channel notifications are not delivered. + */ #define HVM_PARAM_CALLBACK_IRQ 0 + +/* + * These are not used by Xen. They are here for convenience of HVM-guest + * xenbus implementations. + */ #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 + #define HVM_PARAM_PAE_ENABLED 4 + #define HVM_PARAM_IOREQ_PFN 5 + #define HVM_PARAM_BUFIOREQ_PFN 6 + +#ifdef __ia64__ +#define HVM_PARAM_NVRAM_FD 7 +#define HVM_NR_PARAMS 8 +#else #define HVM_NR_PARAMS 7 +#endif #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/usr/src/uts/common/xen/public/io/blkif.h b/usr/src/uts/common/xen/public/io/blkif.h index 4d33926f1e..fde78b9dbb 100644 --- a/usr/src/uts/common/xen/public/io/blkif.h +++ b/usr/src/uts/common/xen/public/io/blkif.h @@ -73,18 +73,20 @@ */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment { - grant_ref_t gref; /* reference to I/O buffer frame */ - /* @first_sect: first sector in frame to transfer (inclusive). */ - /* @last_sect: last sector in frame to transfer (inclusive). */ - uint8_t first_sect, last_sect; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; typedef struct blkif_request blkif_request_t; diff --git a/usr/src/uts/common/xen/public/io/protocols.h b/usr/src/uts/common/xen/public/io/protocols.h new file mode 100644 index 0000000000..0e78452c81 --- /dev/null +++ b/usr/src/uts/common/xen/public/io/protocols.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_IA64 "ia64-abi" +#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__ia64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 +#elif defined(__powerpc64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 +#else +# error arch fixup needed here +#endif + +#endif diff --git a/usr/src/uts/common/xen/public/io/xs_wire.h b/usr/src/uts/common/xen/public/io/xs_wire.h index 825ea45144..8b841bcfea 100644 --- a/usr/src/uts/common/xen/public/io/xs_wire.h +++ b/usr/src/uts/common/xen/public/io/xs_wire.h @@ -45,7 +45,8 @@ enum xsd_sockmsg_type XS_SET_PERMS, XS_WATCH_EVENT, XS_ERROR, - XS_IS_DOMAIN_INTRODUCED + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME }; #define XS_WRITE_NONE "NONE" @@ -59,26 +60,12 @@ struct xsd_errors const char *errstring; }; #define XSD_ERROR(x) { x, #x } -#if !defined(__GNUC__) /* LINTED: static unused */ -static struct xsd_errors xsd_errors[] = { - XSD_ERROR(EINVAL), - XSD_ERROR(EACCES), - XSD_ERROR(EEXIST), - XSD_ERROR(EISDIR), - XSD_ERROR(ENOENT), - XSD_ERROR(ENOMEM), - XSD_ERROR(ENOSPC), - XSD_ERROR(EIO), - XSD_ERROR(ENOTEMPTY), - XSD_ERROR(ENOSYS), - XSD_ERROR(EROFS), - XSD_ERROR(EBUSY), - XSD_ERROR(EAGAIN), - XSD_ERROR(EISCONN) -}; -#else -static struct xsd_errors xsd_errors[] __attribute__((unused)) = { +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { XSD_ERROR(EINVAL), XSD_ERROR(EACCES), XSD_ERROR(EEXIST), @@ -94,7 +81,6 @@ static struct xsd_errors xsd_errors[] __attribute__((unused)) = { XSD_ERROR(EAGAIN), XSD_ERROR(EISCONN) }; -#endif struct xsd_sockmsg { diff --git a/usr/src/uts/common/xen/public/memory.h b/usr/src/uts/common/xen/public/memory.h index 97a6bbfea2..7b1ef44d32 100644 --- a/usr/src/uts/common/xen/public/memory.h +++ b/usr/src/uts/common/xen/public/memory.h @@ -129,6 +129,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); #define XENMEM_maximum_reservation 4 /* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys * mapping table. Architectures which do not have a m2p table do not implement * this command. diff --git a/usr/src/uts/common/xen/public/platform.h b/usr/src/uts/common/xen/public/platform.h index ec9bd604c5..d8de4bef60 100644 --- a/usr/src/uts/common/xen/public/platform.h +++ b/usr/src/uts/common/xen/public/platform.h @@ -114,6 +114,45 @@ struct xenpf_platform_quirk { typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); +#define XENPF_firmware_info 50 +#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +struct xenpf_firmware_info { + /* IN variables. */ + uint32_t type; + uint32_t index; + /* OUT variables. */ + union { + struct { + /* Int13, Fn48: Check Extensions Present. */ + uint8_t device; /* %dl: bios device number */ + uint8_t version; /* %ah: major version */ + uint16_t interface_support; /* %cx: support bitmap */ + /* Int13, Fn08: Legacy Get Device Parameters. */ + uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ + uint8_t legacy_max_head; /* %dh: max head # */ + uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + /* NB. First uint16_t of buffer must be set to buffer size. */ + XEN_GUEST_HANDLE(void) edd_params; + } disk_info; /* XEN_FW_DISK_INFO */ + struct { + uint8_t device; /* bios device number */ + uint32_t mbr_signature; /* offset 0x1b8 in mbr */ + } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ + struct { + /* Int10, AX=4F15: Get EDID info. */ + uint8_t capabilities; + uint8_t edid_transfer_time; + /* must refer to 128-byte buffer */ + XEN_GUEST_HANDLE(uint8_t) edid; + } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + } u; +}; +typedef struct xenpf_firmware_info xenpf_firmware_info_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); + #define XENPF_panic_init 40 struct xenpf_panic_init { unsigned long panic_addr; @@ -131,6 +170,7 @@ struct xen_platform_op { struct xenpf_read_memtype read_memtype; struct xenpf_microcode_update microcode; struct xenpf_platform_quirk platform_quirk; + struct xenpf_firmware_info firmware_info; struct xenpf_panic_init panic_init; uint8_t pad[128]; } u; diff --git a/usr/src/uts/common/xen/public/sysctl.h b/usr/src/uts/common/xen/public/sysctl.h index db2091cbcf..5064b261e3 100644 --- a/usr/src/uts/common/xen/public/sysctl.h +++ b/usr/src/uts/common/xen/public/sysctl.h @@ -41,7 +41,7 @@ #include "xen.h" #include "domctl.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000002 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000003 /* * Read console content from Xen buffer ring. @@ -50,9 +50,11 @@ struct xen_sysctl_readconsole { /* IN variables. */ uint32_t clear; /* Non-zero -> clear after reading. */ - XEN_GUEST_HANDLE(char) buffer; /* Buffer start */ + uint8_t pad1[4]; + XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */ /* IN/OUT variables. */ uint32_t count; /* In: Buffer size; Out: Used buffer size */ + uint8_t pad2[4]; }; typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); @@ -68,11 +70,12 @@ struct xen_sysctl_tbuf_op { #define XEN_SYSCTL_TBUFOP_enable 4 #define XEN_SYSCTL_TBUFOP_disable 5 uint32_t cmd; + uint8_t pad[4]; /* IN/OUT variables */ struct xenctl_cpumap cpu_mask; uint32_t evt_mask; /* OUT variables */ - uint64_t buffer_mfn; + uint64_aligned_t buffer_mfn; uint32_t size; }; typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; @@ -88,9 +91,10 @@ struct xen_sysctl_physinfo { uint32_t sockets_per_node; uint32_t nr_nodes; uint32_t cpu_khz; - uint64_t total_pages; - uint64_t free_pages; - uint64_t scrub_pages; + uint8_t pad[4]; + uint64_aligned_t total_pages; + uint64_aligned_t free_pages; + uint64_aligned_t scrub_pages; uint32_t hw_cap[8]; }; typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; @@ -127,10 +131,11 @@ struct xen_sysctl_perfc_op { /* OUT variables. */ uint32_t nr_counters; /* number of counters description */ uint32_t nr_vals; /* number of values */ + uint8_t pad[4]; /* counter information (or NULL) */ - XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc; + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; /* counter values (or NULL) */ - XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val; + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; }; typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); @@ -139,14 +144,46 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); struct xen_sysctl_getdomaininfolist { /* IN variables. */ domid_t first_domain; + uint8_t pad1[2]; uint32_t max_domains; - XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t) buffer; + XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; /* OUT variables. */ uint32_t num_domains; + uint8_t pad2[4]; }; typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); +/* Inject debug keys into Xen. */ +#define XEN_SYSCTL_debug_keys 7 +struct xen_sysctl_debug_keys { + /* IN variables. */ + XEN_GUEST_HANDLE_64(char) keys; + uint32_t nr_keys; + uint8_t pad[4]; +}; +typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); + +/* Get physical CPU information. */ +#define XEN_SYSCTL_getcpuinfo 8 +struct xen_sysctl_cpuinfo { + uint64_t idletime; +}; +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); +struct xen_sysctl_getcpuinfo { + /* IN variables. */ + uint32_t max_cpus; + uint8_t pad1[4]; + XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; + /* OUT variables. */ + uint32_t nr_cpus; + uint8_t pad2[4]; +}; +typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); + struct xen_sysctl { uint32_t cmd; uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ @@ -157,6 +194,8 @@ struct xen_sysctl { struct xen_sysctl_sched_id sched_id; struct xen_sysctl_perfc_op perfc_op; struct xen_sysctl_getdomaininfolist getdomaininfolist; + struct xen_sysctl_debug_keys debug_keys; + struct xen_sysctl_getcpuinfo getcpuinfo; uint8_t pad[128]; } u; }; diff --git a/usr/src/uts/common/xen/public/vcpu.h b/usr/src/uts/common/xen/public/vcpu.h index 12df6dbe15..a84eb51310 100644 --- a/usr/src/uts/common/xen/public/vcpu.h +++ b/usr/src/uts/common/xen/public/vcpu.h @@ -42,13 +42,13 @@ * @extra_arg == pointer to vcpu_guest_context structure containing initial * state for the VCPU. */ -#define VCPUOP_initialise 0 +#define VCPUOP_initialise 0 /* * Bring up a VCPU. This makes the VCPU runnable. This operation will fail * if the VCPU has not been initialised (VCPUOP_initialise). */ -#define VCPUOP_up 1 +#define VCPUOP_up 1 /* * Bring down a VCPU (i.e., make it non-runnable). @@ -64,16 +64,16 @@ * practise to move a VCPU onto an 'idle' or default page table, LDT and * GDT before bringing it down. */ -#define VCPUOP_down 2 +#define VCPUOP_down 2 /* Returns 1 if the given VCPU is up. */ -#define VCPUOP_is_up 3 +#define VCPUOP_is_up 3 /* * Return information about the state and running time of a VCPU. * @extra_arg == pointer to vcpu_runstate_info structure. */ -#define VCPUOP_get_runstate_info 4 +#define VCPUOP_get_runstate_info 4 struct vcpu_runstate_info { /* VCPU's current state (RUNSTATE_*). */ int state; @@ -128,6 +128,56 @@ struct vcpu_register_runstate_memory_area { } addr; }; typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); + +/* + * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer + * which can be set via these commands. Periods smaller than one millisecond + * may not be supported. + */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +struct vcpu_set_periodic_timer { + uint64_t period_ns; +}; +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); + +/* + * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot + * timer which can be set via these commands. + */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ +struct vcpu_set_singleshot_timer { + uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ + uint32_t flags; /* VCPU_SSHOTTMR_??? */ +}; +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); + +/* Flags to VCPUOP_set_singleshot_timer. */ + /* Require the timeout to be in the future (return -ETIME if it's passed). */ +#define _VCPU_SSHOTTMR_future (0) +#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) + +/* + * Register a memory location in the guest address space for the + * vcpu_info structure. This allows the guest to place the vcpu_info + * structure in a convenient place, such as in a per-cpu data area. + * The pointer need not be page aligned, but the structure must not + * cross a page boundary. + * + * This may be called only once per vcpu. + */ +#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */ +struct vcpu_register_vcpu_info { + uint64_t mfn; /* mfn of page to place vcpu_info */ + uint32_t offset; /* offset within page */ + uint32_t rsvd; /* unused */ +}; +typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); #endif /* __XEN_PUBLIC_VCPU_H__ */ diff --git a/usr/src/uts/common/xen/public/xen.h b/usr/src/uts/common/xen/public/xen.h index 319f65ef08..b1b910c798 100644 --- a/usr/src/uts/common/xen/public/xen.h +++ b/usr/src/uts/common/xen/public/xen.h @@ -30,7 +30,7 @@ #include "xen-compat.h" #if defined(__i386) && !defined(__i386__) -#define __i386__ +#define __i386__ /* foo */ #endif #if defined(__amd64) && !defined(__x86_64__) @@ -143,6 +143,7 @@ #define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ #define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ #define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 @@ -421,7 +422,9 @@ struct vcpu_info { struct arch_vcpu_info arch; struct vcpu_time_info time; }; /* 64 bytes (x86) */ +#ifndef __XEN__ typedef struct vcpu_info vcpu_info_t; +#endif /* * Xen/kernel shared data -- pointer provided in start_info. @@ -479,29 +482,29 @@ struct shared_info { struct arch_shared_info arch; }; +#ifndef __XEN__ typedef struct shared_info shared_info_t; +#endif /* - * Start-of-day memory layout for the initial domain (DOM0): + * Start-of-day memory layout: * 1. The domain is started within contiguous virtual-memory region. - * 2. The contiguous region begins and ends on an aligned 4MB boundary. - * 3. The region start corresponds to the load address of the OS image. - * If the load address is not 4MB aligned then the address is rounded down. - * 4. This the order of bootstrap elements in the initial virtual region: + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: * a. relocated kernel image * b. initial ram disk [mod_start, mod_len] * c. list of allocated page frames [mfn_list, nr_pages] * d. start_info_t structure [register ESI (x86)] * e. bootstrap page tables [pt_base, CR3 (x86)] * f. bootstrap stack [register ESP (x86)] - * 5. Bootstrap elements are packed together, but each is 4kB-aligned. - * 6. The initial ram disk may be omitted. - * 7. The list of page frames forms a contiguous 'pseudo-physical' memory + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory * layout for the domain. In particular, the bootstrap virtual-memory * region is a 1:1 mapping to the first section of the pseudo-physical map. - * 8. All bootstrap elements are mapped read-writable for the guest OS. The + * 7. All bootstrap elements are mapped read-writable for the guest OS. The * only exception is the bootstrap page table, which is mapped read-only. - * 9. There is guaranteed to be at least 512kB padding after the final + * 8. There is guaranteed to be at least 512kB padding after the final * bootstrap element. If necessary, the bootstrap virtual region is * extended by an extra 4MB to ensure this. */ @@ -583,6 +586,8 @@ typedef struct dom0_vga_console_info { } vesa_lfb; } u; } dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t typedef uint8_t xen_domain_handle_t[16]; @@ -602,6 +607,21 @@ DEFINE_XEN_GUEST_HANDLE(uint64_t); #endif /* !__ASSEMBLY__ */ +/* Default definitions for macros used by domctl/sysctl. */ +/* +#if defined(__XEN__) || defined(__XEN_TOOLS__) +*/ +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif +/* +#endif +*/ + + #endif /* __XEN_PUBLIC_XEN_H__ */ /* diff --git a/usr/src/uts/common/xen/sys/gnttab.h b/usr/src/uts/common/xen/sys/gnttab.h index 7066ae3243..eee9c27fbe 100644 --- a/usr/src/uts/common/xen/sys/gnttab.h +++ b/usr/src/uts/common/xen/sys/gnttab.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,8 +40,11 @@ * Copyright (c) 2004-2005, K A Fraser * Copyright (c) 2005, Christopher Clark * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without @@ -64,18 +67,12 @@ #include <sys/hypervisor.h> #include <xen/public/grant_table.h> +#include <xen/public/features.h> #ifdef __cplusplus extern "C" { #endif -/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ -#ifdef __ia64__ -#define NR_GRANT_FRAMES 1 -#else -#define NR_GRANT_FRAMES 4 -#endif - struct gnttab_free_callback { struct gnttab_free_callback *next; void (*fn)(void *); @@ -107,7 +104,7 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); void gnttab_end_foreign_access(grant_ref_t ref, int readonly, gnttab_frame_t page); -int gnttab_grant_foreign_transfer(domid_t domid); +int gnttab_grant_foreign_transfer(domid_t domid, pfn_t pfn); gnttab_frame_t gnttab_end_foreign_transfer_ref(grant_ref_t ref); gnttab_frame_t gnttab_end_foreign_transfer(grant_ref_t ref); @@ -123,6 +120,8 @@ void gnttab_free_grant_reference(grant_ref_t ref); void gnttab_free_grant_references(grant_ref_t head); +int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); + int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); void gnttab_release_grant_reference(grant_ref_t *private_head, @@ -131,10 +130,13 @@ void gnttab_release_grant_reference(grant_ref_t *private_head, void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, uint16_t count); +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback); + void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, gnttab_frame_t frame, int readonly); -void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid); +void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, + pfn_t pfn); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) diff --git a/usr/src/uts/i86xpv/io/privcmd.c b/usr/src/uts/i86xpv/io/privcmd.c index 5660a2cdcb..7a3672e5d7 100644 --- a/usr/src/uts/i86xpv/io/privcmd.c +++ b/usr/src/uts/i86xpv/io/privcmd.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -235,8 +235,13 @@ do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr) } if (mfn == MFN_INVALID) { - error = EINVAL; - break; + /* + * This mfn is invalid and should not be added to + * segmf, as we'd only cause an immediate EFAULT when + * we tried to fault it in. + */ + mfn |= XEN_DOMCTL_PFINFO_XTAB; + continue; } if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0) @@ -246,7 +251,7 @@ do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr) * Tell the process that this MFN could not be mapped, so it * won't later try to access it. */ - mfn |= 0xf0000000; + mfn |= XEN_DOMCTL_PFINFO_XTAB; if (sulword(ulp, mfn) != 0) { error = EFAULT; break; diff --git a/usr/src/uts/i86xpv/io/privcmd_hcall.c b/usr/src/uts/i86xpv/io/privcmd_hcall.c index 2259756c30..6de5a69788 100644 --- a/usr/src/uts/i86xpv/io/privcmd_hcall.c +++ b/usr/src/uts/i86xpv/io/privcmd_hcall.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -81,12 +81,16 @@ import_buffer(import_export_t *iep, void *uaddr, void *kaddr, size_t size, iep->ie_flags |= IE_FREE; } else { iep->ie_kaddr = kaddr; + iep->ie_flags &= ~IE_FREE; } if ((flags & IE_IMPORT) && (ddi_copyin(uaddr, iep->ie_kaddr, size, 0) != 0)) { - if (iep->ie_flags & IE_FREE) + if (iep->ie_flags & IE_FREE) { kmem_free(iep->ie_kaddr, iep->ie_size); + iep->ie_kaddr = NULL; + iep->ie_flags = 0; + } return (-X_EFAULT); } @@ -109,8 +113,11 @@ export_buffer(import_export_t *iep, int *error) if ((iep->ie_flags & IE_EXPORT) && (*error >= 0) && (ddi_copyout(iep->ie_kaddr, iep->ie_uaddr, iep->ie_size, 0) != 0)) copy_err = -X_EFAULT; - if (iep->ie_flags & IE_FREE) + if (iep->ie_flags & IE_FREE) { kmem_free(iep->ie_kaddr, iep->ie_size); + iep->ie_kaddr = NULL; + iep->ie_flags = 0; + } if (copy_err != 0 && *error >= 0) *error = copy_err; @@ -135,8 +142,10 @@ import_handle(import_export_t *iep, void *field, size_t size, int flags) /*LINTED: constant in conditional context*/ get_xen_guest_handle(ptr, (*hdl)); err = import_buffer(iep, ptr, NULL, size, (flags)); - /*LINTED: constant in conditional context*/ - set_xen_guest_handle((*hdl), (void *)((iep)->ie_kaddr)); + if (err == 0) { + /*LINTED: constant in conditional context*/ + set_xen_guest_handle((*hdl), (void *)((iep)->ie_kaddr)); + } return (err); } @@ -189,6 +198,10 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp) * Check this first because our wrapper will forcibly overwrite it. */ if (op.interface_version != XEN_DOMCTL_INTERFACE_VERSION) { +#ifdef DEBUG + printf("domctl vers mismatch (cmd %d, found 0x%x, need 0x%x\n", + op.cmd, op.interface_version, XEN_DOMCTL_INTERFACE_VERSION); +#endif error = -X_EACCES; export_buffer(&op_ie, &error); return (error); @@ -240,8 +253,8 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp) size = roundup(howmany(op.u.shadow_op.pages, NBBY), sizeof (ulong_t)); - error = import_handle(&sub_ie, &op.u.shadow_op.dirty_bitmap, - size, IE_IMPEXP); + error = import_handle(&sub_ie, + &op.u.shadow_op.dirty_bitmap, size, IE_IMPEXP); break; } @@ -254,7 +267,7 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp) sizeof (vcpu_guest_context_t), IE_IMPORT); if (error == -X_EFAULT) /*LINTED: constant in conditional context*/ - get_xen_guest_handle(taddr, op.u.vcpucontext.ctxt); + get_xen_guest_handle_u(taddr, op.u.vcpucontext.ctxt); else taddr = sub_ie.ie_kaddr; DTRACE_XPV2(setvcpucontext__start, domid_t, op.domain, @@ -268,6 +281,25 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp) break; } + + case XEN_DOMCTL_sethvmcontext: { + error = import_handle(&sub_ie, &op.u.hvmcontext.buffer, + op.u.hvmcontext.size, IE_IMPORT); + break; + } + + case XEN_DOMCTL_gethvmcontext: { +#if !defined(__GNUC__) && defined(__i386__) + if (op.u.hvmcontext.buffer.u.p != NULL) +#else + if (op.u.hvmcontext.buffer.p != NULL) +#endif + error = import_handle(&sub_ie, &op.u.hvmcontext.buffer, + op.u.hvmcontext.size, IE_EXPORT); + break; + } + + case XEN_DOMCTL_resumedomain: case XEN_DOMCTL_getvcpuinfo: case XEN_DOMCTL_setvcpuaffinity: case XEN_DOMCTL_getvcpuaffinity: @@ -282,6 +314,8 @@ privcmd_HYPERVISOR_domctl(xen_domctl_t *opp) case XEN_DOMCTL_arch_setup: case XEN_DOMCTL_settimeoffset: case XEN_DOMCTL_real_mode_area: + case XEN_DOMCTL_set_address_size: + case XEN_DOMCTL_sendtrigger: break; default: @@ -348,6 +382,12 @@ privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp) break; } + case XEN_SYSCTL_debug_keys: { + error = import_handle(&sub_ie, &op.u.debug_keys.keys, + op.u.debug_keys.nr_keys, IE_IMPORT); + break; + } + case XEN_SYSCTL_tbuf_op: case XEN_SYSCTL_physinfo: case XEN_SYSCTL_sched_id: @@ -362,7 +402,7 @@ privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp) * before wiring down the output buffer appropriately. */ /*LINTED: constant in conditional context*/ - get_xen_guest_handle(scdp, op.u.perfc_op.desc); + get_xen_guest_handle_u(scdp, op.u.perfc_op.desc); if (scdp != NULL) { static int numcounters = -1; @@ -394,6 +434,11 @@ privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp) break; } + case XEN_SYSCTL_getcpuinfo: + error = import_handle(&sub_ie, &op.u.getcpuinfo.info, + op.u.getcpuinfo.max_cpus * + sizeof (xen_sysctl_cpuinfo_t), IE_EXPORT); + break; default: #ifdef DEBUG printf("unrecognized HYPERVISOR_sysctl %d\n", op.cmd); @@ -532,6 +577,7 @@ privcmd_HYPERVISOR_memory_op(int cmd, void *arg) case XENMEM_current_reservation: case XENMEM_maximum_reservation: + case XENMEM_maximum_gpfn: if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.domid), IE_IMPEXP) != 0) return (-X_EFAULT); @@ -693,6 +739,10 @@ privcmd_HYPERVISOR_event_channel_op(int cmd, void *arg) size = sizeof (evtchn_unmask_t); flags = IE_IMPORT; break; + case EVTCHNOP_reset: + size = sizeof (evtchn_reset_t); + flags = IE_IMPORT; + break; default: #ifdef DEBUG @@ -771,44 +821,48 @@ privcmd_HYPERVISOR_xen_version(int cmd, void *arg) } static int -privcmd_HYPERVISOR_acm_op(int cmd, void *arg) +privcmd_HYPERVISOR_acm_op(void *uacmctl) { int error; - int size = 0; + struct xen_acmctl *acmctl; import_export_t op_ie; - uint32_t flags; - switch (cmd) { + error = import_buffer(&op_ie, uacmctl, NULL, sizeof (*acmctl), + IE_IMPEXP); + if (error != 0) + return (error); + + acmctl = op_ie.ie_kaddr; + + if (acmctl->interface_version != ACM_INTERFACE_VERSION) { +#ifdef DEBUG + printf("acm vers mismatch (cmd %d, found 0x%x, need 0x%x\n", + acmctl->cmd, acmctl->interface_version, + ACM_INTERFACE_VERSION); +#endif + error = -X_EACCES; + export_buffer(&op_ie, &error); + return (error); + } + + switch (acmctl->cmd) { case ACMOP_setpolicy: - size = sizeof (struct acm_setpolicy); - flags = IE_IMPORT; - break; case ACMOP_getpolicy: - size = sizeof (struct acm_getpolicy); - flags = IE_IMPORT; - break; case ACMOP_dumpstats: - size = sizeof (struct acm_dumpstats); - flags = IE_IMPORT; - break; case ACMOP_getssid: - size = sizeof (struct acm_getssid); - flags = IE_IMPORT; - break; case ACMOP_getdecision: - size = sizeof (struct acm_getdecision); - flags = IE_IMPEXP; + case ACMOP_chgpolicy: + case ACMOP_relabeldoms: break; default: #ifdef DEBUG - printf("unrecognized HYPERVISOR_acm_op op %d\n", cmd); + printf("unrecognized HYPERVISOR_acm_op op %d\n", acmctl->cmd); #endif return (-X_EINVAL); } - error = import_buffer(&op_ie, arg, NULL, size, flags); if (error == 0) - error = HYPERVISOR_acm_op(cmd, op_ie.ie_kaddr); + error = HYPERVISOR_acm_op(acmctl); export_buffer(&op_ie, &error); return (error); @@ -966,8 +1020,7 @@ do_privcmd_hypercall(void *uarg, int mode, cred_t *cr, int *rval) (uint_t *)hc->arg[2], (domid_t)hc->arg[3]); break; case __HYPERVISOR_acm_op: - error = privcmd_HYPERVISOR_acm_op( - (int)hc->arg[0], (void *)hc->arg[1]); + error = privcmd_HYPERVISOR_acm_op((void *)hc->arg[0]); break; case __HYPERVISOR_hvm_op: error = privcmd_HYPERVISOR_hvm_op( diff --git a/usr/src/uts/i86xpv/ml/xenguest.s b/usr/src/uts/i86xpv/ml/xenguest.s index 187abcb52f..2d68e640a9 100644 --- a/usr/src/uts/i86xpv/ml/xenguest.s +++ b/usr/src/uts/i86xpv/ml/xenguest.s @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -61,15 +61,7 @@ int __lint_xen_guest; NOTE("Xen", XEN_ELFNOTE_VIRT_BASE, .4byte, 0x40000000) NOTE("Xen", XEN_ELFNOTE_PADDR_OFFSET, .4byte, 0x40000000) #if defined(__i386) - /* - * NB: If you want to build a kernel that works on a non-PAE - * hypervisor, just comment out the next line and rebuild Solaris. - * It'll just work, the kernel figures everything out dynamically. - */ NOTE("Xen", XEN_ELFNOTE_PAE_MODE, .string, "yes,bimodal") - /* - * XXPV: implement XEN_ELFNOTE_HV_START_LOW - */ #endif #endif /* __lint */ diff --git a/usr/src/uts/i86xpv/os/xpv_panic.c b/usr/src/uts/i86xpv/os/xpv_panic.c index 191485ffcd..713279246a 100644 --- a/usr/src/uts/i86xpv/os/xpv_panic.c +++ b/usr/src/uts/i86xpv/os/xpv_panic.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,6 +86,20 @@ static pfn_t ptable_pfn[MAX_NUM_LEVEL]; static int xpv_dump_pages; /* + * There are up to two large swathes of RAM that we don't want to include + * in the dump: those that comprise the Xen version of segkpm. On 32-bit + * systems there is no such region of memory. On 64-bit systems, there + * should be just a single contiguous region that corresponds to all of + * physical memory. The tricky bit is that Xen's heap sometimes lives in + * the middle of their segkpm, and is mapped using only kpm-like addresses. + * In that case, we need to skip the swathes before and after Xen's heap. + */ +uintptr_t kpm1_low = 0; +uintptr_t kpm1_high = 0; +uintptr_t kpm2_low = 0; +uintptr_t kpm2_high = 0; + +/* * Some commonly used values that we don't want to recompute over and over. */ static int xpv_panic_nptes[MAX_NUM_LEVEL]; @@ -214,15 +228,6 @@ xpv_va_walk(uintptr_t *vaddr) idx++; scan_va += mmu.level_size[l]; } - va = scan_va; - - /* - * See if we've hit the end of the range. - */ - if (scan_va >= xpv_end || scan_va < *vaddr) { - va = scan_va; - break; - } /* * If there are no valid mappings in this table, we @@ -233,6 +238,13 @@ xpv_va_walk(uintptr_t *vaddr) break; } + va = scan_va; + /* + * See if we've hit the end of the range. + */ + if (va >= xpv_end || va < *vaddr) + break; + /* * If this mapping is for a pagetable, we drop down * to the next level in the hierarchy and look for @@ -252,10 +264,16 @@ xpv_va_walk(uintptr_t *vaddr) break; } - /* We also want to skip the Xen version of KPM */ - if (va >= (uintptr_t)xpv_panic_info->pi_ram_start && - va < (uintptr_t)xpv_panic_info->pi_ram_end) { - va = (uintptr_t)xpv_panic_info->pi_ram_end; + /* + * See if the address is within one of the two + * kpm-like regions we want to skip. + */ + if (va >= kpm1_low && va < kpm1_high) { + va = kpm1_high; + break; + } + if (va >= kpm2_low && va < kpm2_high) { + va = kpm2_high; break; } @@ -691,6 +709,17 @@ xpv_do_panic(void *arg) xpv_panic_info = pip; +#if defined(__amd64) + kpm1_low = (uintptr_t)xpv_panic_info->pi_ram_start; + if (xpv_panic_info->pi_xen_start == NULL) { + kpm1_high = (uintptr_t)xpv_panic_info->pi_ram_end; + } else { + kpm1_high = (uintptr_t)xpv_panic_info->pi_xen_start; + kpm2_low = (uintptr_t)xpv_panic_info->pi_xen_end; + kpm2_high = (uintptr_t)xpv_panic_info->pi_ram_end; + } +#endif + /* * Make sure we are running on the Solaris %gs. The Xen panic code * should already have set up the GDT properly. diff --git a/usr/src/uts/intel/os/driver_aliases b/usr/src/uts/intel/os/driver_aliases index 0fe52eb075..2b2c09cced 100644 --- a/usr/src/uts/intel/os/driver_aliases +++ b/usr/src/uts/intel/os/driver_aliases @@ -33,6 +33,7 @@ cpudrv "cpu" xnbe "xnb,ioemu" xnbo "xnb,SUNW_mac" xnbu "xnb,netfront" +xnbo xnb pit_beep "SUNW,pit_beep" intel_nb5000 "pci8086,25d8" intel_nb5000 "pci8086,25d4" diff --git a/usr/src/uts/intel/sys/hypervisor.h b/usr/src/uts/intel/sys/hypervisor.h index 9f5aadd499..5e013abd15 100644 --- a/usr/src/uts/intel/sys/hypervisor.h +++ b/usr/src/uts/intel/sys/hypervisor.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -206,7 +206,7 @@ extern long HYPERVISOR_vcpu_op(int, int, void *); extern long HYPERVISOR_set_segment_base(int, ulong_t); #endif /* __amd64 */ extern int HYPERVISOR_mmuext_op(struct mmuext_op *, int, uint_t *, domid_t); -extern long HYPERVISOR_acm_op(int cmd, void *); +extern long HYPERVISOR_acm_op(struct xen_acmctl *); extern long HYPERVISOR_nmi_op(int cmd, void *); extern long HYPERVISOR_sched_op(int, void *); extern long HYPERVISOR_callback_op(int, void *); |