diff options
Diffstat (limited to 'usr/src/uts/i86pc')
37 files changed, 1404 insertions, 1954 deletions
diff --git a/usr/src/uts/i86pc/boot/boot_fb.c b/usr/src/uts/i86pc/boot/boot_fb.c index 1ac4789af7..e0e79bd14e 100644 --- a/usr/src/uts/i86pc/boot/boot_fb.c +++ b/usr/src/uts/i86pc/boot/boot_fb.c @@ -354,28 +354,44 @@ boot_get_color(uint32_t *fg, uint32_t *bg) /* ansi to solaris colors, see also boot_console.c */ if (fb_info.inverse == B_TRUE || fb_info.inverse_screen == B_TRUE) { - if (fb_info.fg_color < 16) - *bg = dim_xlate[fb_info.fg_color]; - else + if (fb_info.fg_color < XLATE_NCOLORS) { + /* + * white fg -> bright white bg + */ + if (fb_info.fg_color == pc_white) + *bg = brt_xlate[fb_info.fg_color]; + else + *bg = dim_xlate[fb_info.fg_color]; + } else { *bg = fb_info.fg_color; + } - if (fb_info.bg_color < 16) - *fg = brt_xlate[fb_info.bg_color]; - else + if (fb_info.bg_color < XLATE_NCOLORS) { + if (fb_info.bg_color == pc_white) + *fg = brt_xlate[fb_info.bg_color]; + else + *fg = dim_xlate[fb_info.bg_color]; + } else { *fg = fb_info.bg_color; + } } else { - if (fb_info.bg_color < 16) { - if (fb_info.bg_color == 7) + if (fb_info.fg_color < XLATE_NCOLORS) { + if (fb_info.fg_color == pc_white) + *fg = brt_xlate[fb_info.fg_color]; + else + *fg = dim_xlate[fb_info.fg_color]; + } else { + *fg = fb_info.fg_color; + } + + if (fb_info.bg_color < XLATE_NCOLORS) { + if (fb_info.bg_color == pc_white) *bg = brt_xlate[fb_info.bg_color]; else *bg = dim_xlate[fb_info.bg_color]; } else { *bg = fb_info.bg_color; } - if (fb_info.fg_color < 16) - *fg = dim_xlate[fb_info.fg_color]; - else - *fg = fb_info.fg_color; } } diff --git a/usr/src/uts/i86pc/io/apix/apix.c b/usr/src/uts/i86pc/io/apix/apix.c index 18dee7499a..cedc49147e 100644 --- a/usr/src/uts/i86pc/io/apix/apix.c +++ b/usr/src/uts/i86pc/io/apix/apix.c @@ -186,18 +186,6 @@ static void *apix_hdlp; static int apix_is_enabled = 0; /* - * Flag to indicate if APIX is to be enabled only for platforms - * with specific hw feature(s). - */ -int apix_hw_chk_enable = 1; - -/* - * Hw features that are checked for enabling APIX support. - */ -#define APIX_SUPPORT_X2APIC 0x00000001 -uint_t apix_supported_hw = APIX_SUPPORT_X2APIC; - -/* * apix_lock is used for cpu selection and vector re-binding */ lock_t apix_lock; @@ -272,22 +260,10 @@ apix_probe() if (get_hwenv() & HW_XEN_HVM) return (PSM_FAILURE); - /* check for hw features if specified */ - if (apix_hw_chk_enable) { - /* check if x2APIC mode is supported */ - if ((apix_supported_hw & APIX_SUPPORT_X2APIC) == - APIX_SUPPORT_X2APIC) { - if (apic_local_mode() == LOCAL_X2APIC) { - /* x2APIC mode activated by BIOS, switch ops */ - apic_mode = LOCAL_X2APIC; - apic_change_ops(); - } else if (!apic_detect_x2apic()) { - /* x2APIC mode is not supported in the hw */ - apix_enable = 0; - } - } - if (apix_enable == 0) - return (PSM_FAILURE); + if (apic_local_mode() == LOCAL_X2APIC) { + /* x2APIC mode activated by BIOS, switch ops */ + apic_mode = LOCAL_X2APIC; + apic_change_ops(); } rval = apic_probe_common(apix_psm_info.p_mach_idstring); diff --git a/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c b/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c index 6d1a99ea05..0d2d1fe1de 100644 --- a/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c +++ b/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c @@ -11,6 +11,7 @@ /* * Copyright 2016 Toomas Soome <tsoome@me.com> + * Copyright 2020 RackTop Systems, Inc. */ /* @@ -81,6 +82,17 @@ gfxp_check_for_console(dev_info_t *devi, struct gfxp_fb_softc *softc, uint16_t data16; /* + * fb_info is filled in by data gathered by the bootloader. + * In particular we are interested in "paddr" which is the physical + * address of the framebuffer. If that is not zero, then we have + * a valid framebuffer and we can use this device as a console. + */ + if (fb_info.paddr != 0) { + softc->flags |= GFXP_FLAG_CONSOLE; + return; + } + + /* * Based on Section 11.3, "PCI Display Subsystem Initialization", * of the 1.1 PCI-to-PCI Bridge Architecture Specification * determine if this is the boot console device. First, see diff --git a/usr/src/uts/i86pc/io/hpet_acpi.c b/usr/src/uts/i86pc/io/hpet_acpi.c index ac5a885a38..aace99b18b 100644 --- a/usr/src/uts/i86pc/io/hpet_acpi.c +++ b/usr/src/uts/i86pc/io/hpet_acpi.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2020 Oxide Computer Company */ #include <sys/hpet_acpi.h> @@ -34,6 +35,8 @@ #include <sys/clock.h> #include <sys/archsystm.h> #include <sys/cpupart.h> +#include <sys/x86_archext.h> +#include <sys/prom_debug.h> static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags); static boolean_t hpet_install_proxy(void); @@ -140,17 +143,36 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) (void) memset(&hpet_info, 0, sizeof (hpet_info)); hpet.supported = HPET_NO_SUPPORT; - if (idle_cpu_no_deep_c) + if ((get_hwenv() & HW_XEN_HVM) != 0) { + /* + * In some AWS EC2 guests, though the HPET is advertised via + * ACPI, programming the interrupt on the non-legacy timer can + * result in an immediate reset of the instance. It is not + * currently possible to tell whether this is an instance with + * broken HPET emulation or not, so we simply disable it across + * the board. + */ + PRM_POINT("will not program HPET in Xen HVM"); return (DDI_FAILURE); + } - if (!cpuid_deep_cstates_supported()) + if (idle_cpu_no_deep_c || + !cpuid_deep_cstates_supported()) { + /* + * If Deep C-States are disabled or not supported, then we do + * not need to program the HPET at all as it will not + * subsequently be used. + */ + PRM_POINT("no need to program the HPET"); return (DDI_FAILURE); + } hpet_establish_hooks(); /* * Get HPET ACPI table 1. */ + PRM_POINT("AcpiGetTable() HPET #1"); if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1, (ACPI_TABLE_HEADER **)&hpet_table))) { cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table"); @@ -162,14 +184,18 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) return (DDI_FAILURE); } + PRM_POINT("hpet_memory_map()"); la = hpet_memory_map(hpet_table); + PRM_DEBUG(la); if (la == NULL) { cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed"); return (DDI_FAILURE); } hpet_info.logical_address = la; + PRM_POINT("hpet_read_gen_cap()"); ret = hpet_read_gen_cap(&hpet_info); + PRM_DEBUG(ret); hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret); hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret); hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret); @@ -189,6 +215,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) } num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap; + PRM_DEBUG(num_timers); if ((num_timers < 3) || (num_timers > 32)) { cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers " "%lx", (long)num_timers); @@ -197,20 +224,23 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc( num_timers * sizeof (uint64_t), KM_SLEEP); + PRM_POINT("hpet_read_gen_config()"); ret = hpet_read_gen_config(&hpet_info); hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); /* - * Solaris does not use the HPET Legacy Replacement Route capabilities. + * illumos does not use the HPET Legacy Replacement Route capabilities. * This feature has been off by default on test systems. * The HPET spec does not specify if Legacy Replacement Route is - * on or off by default, so we explicitely set it off here. + * on or off by default, so we explicitly set it off here. * It should not matter which mode the HPET is in since we use * the first available non-legacy replacement timer: timer 2. */ + PRM_POINT("hpet_read_gen_config()"); (void) hpet_set_leg_rt_cnf(&hpet_info, 0); + PRM_POINT("hpet_read_gen_config() again"); ret = hpet_read_gen_config(&hpet_info); hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); @@ -218,6 +248,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info); hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info); + PRM_POINT("disable timer loop..."); for (ti = 0; ti < num_timers; ++ti) { ret = hpet_read_timer_N_config(&hpet_info, ti); /* @@ -231,6 +262,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret); } + PRM_POINT("disable timer loop complete"); /* * Be aware the Main Counter may need to be initialized in the future @@ -238,6 +270,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) * The HPET's Main Counter does not need to be initialize to a specific * value before starting it for use to wake up CPUs from Deep C-States. */ + PRM_POINT("hpet_start_main_counter()"); if (hpet_start_main_counter(&hpet_info) != AE_OK) { cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed"); return (DDI_FAILURE); @@ -247,6 +280,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) /* * Read main counter twice to record HPET latency for debugging. */ + PRM_POINT("TSC and HPET reads:"); hpet_info.tsc[0] = tsc_read(); hpet_info.hpet_main_counter_reads[0] = hpet_read_main_counter_value(&hpet_info); @@ -255,6 +289,12 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) hpet_read_main_counter_value(&hpet_info); hpet_info.tsc[2] = tsc_read(); + PRM_DEBUG(hpet_info.hpet_main_counter_reads[0]); + PRM_DEBUG(hpet_info.hpet_main_counter_reads[1]); + PRM_DEBUG(hpet_info.tsc[0]); + PRM_DEBUG(hpet_info.tsc[1]); + PRM_DEBUG(hpet_info.tsc[2]); + ret = hpet_read_gen_config(&hpet_info); hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); @@ -293,6 +333,7 @@ hpet_acpi_fini(void) static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags) { + PRM_POINT("hpet_get_IOAPIC_intr_capable_timer()"); if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) { cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed."); return (DDI_FAILURE); @@ -300,6 +341,7 @@ hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags) hpet_init_proxy_data(); + PRM_POINT("hpet_install_interrupt_handler()"); if (hpet_install_interrupt_handler(&hpet_isr, hpet_info.cstate_timer.intr) != AE_OK) { cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed."); @@ -314,13 +356,16 @@ hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags) * Avoid a possibly stuck interrupt by programing the HPET's timer here * before the I/O APIC is programmed to handle this interrupt. */ + PRM_POINT("hpet_timer_set_up()"); hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer, hpet_info.cstate_timer.intr); + PRM_POINT("back from hpet_timer_set_up()"); /* * All HPET functionality is supported. */ hpet.supported = HPET_FULL_SUPPORT; + PRM_POINT("HPET full support"); return (DDI_SUCCESS); } @@ -564,14 +609,25 @@ hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l) } static void -hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l) +hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t conf) { - if (hip->timer_n_config[n].size_cap == 1) - *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS( - hip->logical_address, n) = l; - else - *(uint32_t *)HPET_TIMER_N_CONF_ADDRESS( - hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l); + /* + * The configuration register size is not affected by the size + * capability; it is always a 64-bit value. The top 32-bit half of + * this register is always read-only so we constrain our write to the + * bottom half. + */ + uint32_t *confaddr = (uint32_t *)HPET_TIMER_N_CONF_ADDRESS( + hip->logical_address, n); + uint32_t conf32 = 0xFFFFFFFF & conf; + + PRM_DEBUG(n); + PRM_DEBUG(conf); + PRM_DEBUG(conf32); + + *confaddr = conf32; + + PRM_POINT("write done"); } static void @@ -630,16 +686,19 @@ hpet_install_interrupt_handler(avfunc func, int vector) static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip) { - int timer; - int intr; + int timer; + int intr; for (timer = HPET_FIRST_NON_LEGACY_TIMER; timer < hip->gen_cap.num_tim_cap; ++timer) { - if (!hpet_timer_available(hip->allocated_timers, timer)) continue; intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1; + + PRM_DEBUG(timer); + PRM_DEBUG(intr); + if (intr >= 0) { hpet_timer_alloc(&hip->allocated_timers, timer); hip->cstate_timer.timer = timer; @@ -678,7 +737,12 @@ hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt) { uint64_t conf; + PRM_DEBUG(timer_n); + PRM_DEBUG(interrupt); + + PRM_POINT("hpet_read_timer_N_config()"); conf = hpet_read_timer_N_config(hip, timer_n); + PRM_DEBUG(conf); /* * Caller is required to verify this interrupt route is supported. @@ -691,7 +755,10 @@ hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt) conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT; /* disabled */ conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT; /* Level Triggered */ + PRM_POINT("hpet_write_timer_N_config()"); + PRM_DEBUG(conf); hpet_write_timer_N_config(hip, timer_n, conf); + PRM_POINT("back from hpet_write_timer_N_config()"); } /* diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c index aea7f2e856..9b9944fbd0 100644 --- a/usr/src/uts/i86pc/io/mp_platform_common.c +++ b/usr/src/uts/i86pc/io/mp_platform_common.c @@ -25,6 +25,7 @@ * Copyright (c) 2017 by Delphix. All rights reserved. * Copyright (c) 2019, Joyent, Inc. * Copyright 2020 RackTop Systems, Inc. + * Copyright 2020 Oxide Computer Company */ /* * Copyright (c) 2010, Intel Corporation. @@ -72,6 +73,7 @@ #include <sys/note.h> #include <sys/pci_intr_lib.h> #include <sys/sunndi.h> +#include <sys/prom_debug.h> #if !defined(__xpv) #include <sys/hpet.h> #include <sys/clock.h> @@ -334,7 +336,7 @@ apic_probe_common(char *modname) uint32_t mpct_addr, ebda_start = 0, base_mem_end; caddr_t biosdatap; caddr_t mpct = NULL; - caddr_t fptr; + caddr_t fptr = NULL; int i, mpct_size = 0, mapsize, retval = PSM_FAILURE; ushort_t ebda_seg, base_mem_size; struct apic_mpfps_hdr *fpsp; @@ -342,6 +344,8 @@ apic_probe_common(char *modname) int bypass_cpu_and_ioapics_in_mptables; int acpi_user_options; + PRM_POINT("apic_probe_common()"); + if (apic_forceload < 0) return (retval); @@ -359,11 +363,15 @@ apic_probe_common(char *modname) if (!apic_use_acpi) apic_use_acpi_madt_only = 0; + PRM_POINT("acpi_probe()"); retval = acpi_probe(modname); + PRM_DEBUG(retval); /* in UEFI system, there is no BIOS data */ - if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), 0, "efi-systab")) + if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), 0, "efi-systab")) { + PRM_POINT("UEFI system!"); goto apic_ret; + } /* * mapin the bios data area 40:0 @@ -371,17 +379,21 @@ apic_probe_common(char *modname) * 40:0Eh - two-byte location for the exact starting address of * the EBDA segment for EISA */ + PRM_POINT("psm_map_phys()"); biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); + PRM_DEBUG(biosdatap); if (!biosdatap) goto apic_ret; fpsp = (struct apic_mpfps_hdr *)NULL; mapsize = MPFPS_RAM_WIN_LEN; /*LINTED: pointer cast may result in improper alignment */ ebda_seg = *((ushort_t *)(biosdatap+0xe)); + PRM_DEBUG(ebda_seg); /* check the 1k of EBDA */ if (ebda_seg) { ebda_start = ((uint32_t)ebda_seg) << 4; fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); + PRM_DEBUG(fptr); if (fptr) { if (!(fpsp = apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) @@ -389,6 +401,7 @@ apic_probe_common(char *modname) } } /* If not in EBDA, check the last k of system base memory */ + PRM_DEBUG(fpsp); if (!fpsp) { /*LINTED: pointer cast may result in improper alignment */ base_mem_size = *((ushort_t *)(biosdatap + 0x13)); @@ -402,6 +415,7 @@ apic_probe_common(char *modname) fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, PROT_READ); + PRM_DEBUG(fptr); if (fptr) { if (!(fpsp = apic_find_fps_sig(fptr, @@ -410,13 +424,16 @@ apic_probe_common(char *modname) } } } + PRM_POINT("psm_unmap_phys()"); psm_unmap_phys(biosdatap, 0x20); /* If still cannot find it, check the BIOS ROM space */ + PRM_DEBUG(fpsp); if (!fpsp) { mapsize = MPFPS_ROM_WIN_LEN; fptr = psm_map_phys(MPFPS_ROM_WIN_START, MPFPS_ROM_WIN_LEN, PROT_READ); + PRM_DEBUG(fptr); if (fptr) { if (!(fpsp = apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { @@ -426,13 +443,18 @@ apic_probe_common(char *modname) } } + PRM_DEBUG(fptr); + PRM_DEBUG(fpsp); + PRM_POINT("apic_checksum()"); if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { + PRM_POINT("psm_unmap_phys()"); psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); goto apic_ret; } apic_spec_rev = fpsp->mpfps_spec_rev; if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { + PRM_POINT("psm_unmap_phys()"); psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); goto apic_ret; } @@ -442,7 +464,9 @@ apic_probe_common(char *modname) /* check default configuration (dual CPUs) */ if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { + PRM_POINT("psm_unmap_phys()"); psm_unmap_phys(fptr, mapsize); + PRM_POINT("apic_handle_defconf()"); if ((retval = apic_handle_defconf()) != PSM_SUCCESS) return (retval); @@ -451,6 +475,7 @@ apic_probe_common(char *modname) /* MP Configuration Table */ mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); + PRM_DEBUG(mpct_addr); psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ @@ -472,6 +497,7 @@ apic_probe_common(char *modname) } mpct_size = (int)hdrp->mpcnf_tbl_length; + PRM_POINT("apic_set_pwroff_method_from_mpcnfhdr()"); apic_set_pwroff_method_from_mpcnfhdr(hdrp); psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); @@ -497,6 +523,8 @@ apic_probe_common(char *modname) hdrp = (struct apic_mp_cnf_hdr *)mpct; apicadr = (uint32_t *)mapin_apic((uint32_t)hdrp->mpcnf_local_apic, APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); + PRM_DEBUG(hdrp); + PRM_DEBUG(apicadr); if (!apicadr) goto apic_fail1; @@ -509,15 +537,20 @@ apic_probe_common(char *modname) } apic_fail1: + PRM_POINT("apic_fail1:"); psm_unmap_phys(mpct, mpct_size); mpct = NULL; apic_ret: + PRM_POINT("apic_ret:"); if (retval == PSM_SUCCESS) { extern int apic_ioapic_method_probe(); - if ((retval = apic_ioapic_method_probe()) == PSM_SUCCESS) + PRM_POINT("apic_ioapic_method_probe()"); + if ((retval = apic_ioapic_method_probe()) == PSM_SUCCESS) { + PRM_POINT("SUCCESS"); return (PSM_SUCCESS); + } } for (i = 0; i < apic_io_max; i++) @@ -533,6 +566,7 @@ apic_ret: if (mpct) psm_unmap_phys(mpct, mpct_size); + PRM_DEBUG(retval); return (retval); } @@ -632,20 +666,24 @@ acpi_probe(char *modname) if (!apic_use_acpi) return (PSM_FAILURE); + PRM_POINT("AcpiGetTable(MADT)"); if (AcpiGetTable(ACPI_SIG_MADT, 1, (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) { cmn_err(CE_WARN, "!acpi_probe: No MADT found!"); return (PSM_FAILURE); } + PRM_DEBUG((uint32_t)acpi_mapic_dtp->Address); + PRM_POINT("mapin_apic()"); apicadr = mapin_apic((uint32_t)acpi_mapic_dtp->Address, APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); if (!apicadr) return (PSM_FAILURE); if ((local_ids = (uint32_t *)kmem_zalloc(NCPU * sizeof (uint32_t), - KM_NOSLEEP)) == NULL) + KM_NOSLEEP)) == NULL) { return (PSM_FAILURE); + } if ((proc_ids = (uint32_t *)kmem_zalloc(NCPU * sizeof (uint32_t), KM_NOSLEEP)) == NULL) { @@ -653,7 +691,9 @@ acpi_probe(char *modname) return (PSM_FAILURE); } + PRM_POINT("acpi_get_apic_lid()"); local_ids[0] = acpi_get_apic_lid(); + PRM_DEBUG(local_ids[0]); apic_nproc = 1; apic_io_max = 0; @@ -662,6 +702,7 @@ acpi_probe(char *modname) madt_size = acpi_mapic_dtp->Header.Length; madt_seen = sizeof (*acpi_mapic_dtp); + PRM_DEBUG(madt_size); while (madt_seen < madt_size) { switch (ap->Type) { case ACPI_MADT_TYPE_LOCAL_APIC: @@ -812,6 +853,9 @@ acpi_probe(char *modname) ap = (ACPI_SUBTABLE_HEADER *)(((char *)ap) + ap->Length); } + PRM_DEBUG(apic_nproc); + PRM_DEBUG(apic_io_max); + /* We found multiple enabled cpus via MADT */ if ((apic_nproc > 1) && (apic_io_max > 0)) { acpi_found_smp_config = B_TRUE; @@ -826,6 +870,7 @@ acpi_probe(char *modname) if (plat_dr_support_cpu()) { apic_max_nproc = max_ncpus; } + PRM_DEBUG(apic_max_nproc); apic_cpus_size = max(apic_nproc, max_ncpus) * sizeof (*apic_cpus); if ((apic_cpus = kmem_zalloc(apic_cpus_size, KM_NOSLEEP)) == NULL) goto cleanup; @@ -834,15 +879,21 @@ acpi_probe(char *modname) * ACPI doesn't provide the local apic ver, get it directly from the * local apic */ + PRM_POINT("apic_read(APIC_VERS_REG)"); ver = apic_reg_ops->apic_read(APIC_VERS_REG); + PRM_DEBUG(ver); + PRM_DEBUG(apic_nproc); + PRM_DEBUG(boot_ncpus); for (i = 0; i < apic_nproc; i++) { apic_cpus[i].aci_local_id = local_ids[i]; apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); apic_cpus[i].aci_processor_id = proc_ids[i]; /* Only build mapping info for CPUs present at boot. */ - if (i < boot_ncpus) + if (i < boot_ncpus) { (void) acpica_map_cpu(i, proc_ids[i]); + } } + PRM_POINT("acpica_map_cpu loop complete"); /* * To support CPU dynamic reconfiguration, the apic CPU info structure @@ -881,8 +932,10 @@ acpi_probe(char *modname) apic_cpus[i].aci_status = APIC_CPU_FREE; } + PRM_POINT("ioapic reads"); for (i = 0; i < apic_io_max; i++) { ioapic_ix = i; + PRM_DEBUG(ioapic_ix); /* * need to check Sitka on the following acpi problem @@ -892,16 +945,20 @@ acpi_probe(char *modname) * actual id directly from the ioapic. */ id = ioapic_read(ioapic_ix, APIC_ID_CMD); + PRM_DEBUG(id); hid = (uchar_t)(id >> 24); + PRM_DEBUG(hid); if (hid != apic_io_id[i]) { if (apic_io_id[i] == 0) apic_io_id[i] = hid; else { /* set ioapic id to whatever reported by ACPI */ id = ((uint32_t)apic_io_id[i]) << 24; + PRM_POINT("ioapic_write(ID)"); ioapic_write(ioapic_ix, APIC_ID_CMD, id); } } + PRM_POINT("ioapic_read(VERS)"); ver = ioapic_read(ioapic_ix, APIC_VERS_CMD); apic_io_ver[i] = (uchar_t)(ver & 0xff); intmax = (ver >> 16) & 0xff; @@ -917,6 +974,7 @@ acpi_probe(char *modname) * acpi-user-options specifies legacy mode * (no SCI, no ACPI mode) */ + PRM_POINT("acpica_get_sci()"); if (acpica_get_sci(&sci, &sci_flags) != AE_OK) sci = -1; @@ -925,6 +983,7 @@ acpi_probe(char *modname) * If this fails, we don't attempt to use ACPI * even if we were able to get a MADT above */ + PRM_POINT("acpica_init()"); if (acpica_init() != AE_OK) { cmn_err(CE_WARN, "!apic: Failed to initialize acpica!"); goto cleanup; @@ -934,6 +993,7 @@ acpi_probe(char *modname) * Call acpica_build_processor_map() now that we have * ACPI namesspace access */ + PRM_POINT("acpica_build_processor_map()"); (void) acpica_build_processor_map(); /* @@ -952,15 +1012,19 @@ acpi_probe(char *modname) if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; + PRM_POINT("acpi_psm_init()"); if (acpi_psm_init(modname, acpi_verboseflags) == ACPI_PSM_FAILURE) goto cleanup; /* Enable ACPI APIC interrupt routing */ + PRM_POINT("apic_acpi_enter_apicmode()"); if (apic_acpi_enter_apicmode() != PSM_FAILURE) { cmn_err(CE_NOTE, "!apic: Using APIC interrupt routing mode"); + PRM_POINT("build_reserved_irqlist()"); build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); apic_enable_acpi = 1; if (apic_sci_vect > 0) { + PRM_POINT("acpica_set_core_feature()"); acpica_set_core_feature(ACPI_FEATURE_SCI_EVENT); } if (apic_use_acpi_madt_only) { @@ -970,16 +1034,18 @@ acpi_probe(char *modname) #if !defined(__xpv) /* - * probe ACPI for hpet information here which is used later - * in apic_picinit(). + * Probe ACPI for HPET information here which is used later in + * apic_picinit(). Note that we do not need to use the HPET at + * all on most modern systems, but if there is an actionable + * failure message it will be logged by the routine itself. */ - if (hpet_acpi_init(&apic_hpet_vect, &apic_hpet_flags) < 0) { - cmn_err(CE_NOTE, "!ACPI HPET table query failed\n"); - } + PRM_POINT("hpet_acpi_init()"); + (void) hpet_acpi_init(&apic_hpet_vect, &apic_hpet_flags); #endif kmem_free(local_ids, NCPU * sizeof (uint32_t)); kmem_free(proc_ids, NCPU * sizeof (uint32_t)); + PRM_POINT("SUCCESS"); return (PSM_SUCCESS); } /* if setting APIC mode failed above, we fall through to cleanup */ diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.c b/usr/src/uts/i86pc/io/pcplusmp/apic.c index efca63c814..c987391435 100644 --- a/usr/src/uts/i86pc/io/pcplusmp/apic.c +++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c @@ -26,6 +26,7 @@ * Copyright (c) 2010, Intel Corporation. * All rights reserved. * Copyright 2019 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ /* @@ -58,6 +59,7 @@ #include <sys/ddi_impldefs.h> #include <sys/pci.h> #include <sys/promif.h> +#include <sys/prom_debug.h> #include <sys/x86_archext.h> #include <sys/cpc_impl.h> #include <sys/uadmin.h> @@ -249,16 +251,23 @@ _info(struct modinfo *modinfop) static int apic_probe(void) { + PRM_POINT("apic_probe()"); + /* check if apix is initialized */ - if (apix_enable && apix_loaded()) + if (apix_enable && apix_loaded()) { + PRM_POINT("apic_probe FAILURE: apix is loaded"); return (PSM_FAILURE); + } /* * Check whether x2APIC mode was activated by BIOS. We don't support * that in pcplusmp as apix normally handles that. */ - if (apic_local_mode() == LOCAL_X2APIC) + PRM_POINT("apic_local_mode()"); + if (apic_local_mode() == LOCAL_X2APIC) { + PRM_POINT("apic_probe FAILURE: in x2apic mode"); return (PSM_FAILURE); + } /* continue using pcplusmp PSM */ apix_enable = 0; diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 1046a54126..e9a34f8630 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -203,7 +203,7 @@ check_svm_features(void) */ if (nasid == 0 || nasid > regs[1]) nasid = regs[1]; - KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %#x", nasid)); + KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %x", nasid)); /* bhyve requires the Nested Paging feature */ if (!(svm_feature & AMD_CPUID_SVM_NP)) { @@ -386,11 +386,11 @@ svm_msr_perm(uint8_t *perm_bitmap, uint64_t msr, bool read, bool write) int index, bit, error; error = svm_msr_index(msr, &index, &bit); - KASSERT(error == 0, ("%s: invalid msr %#lx", __func__, msr)); + KASSERT(error == 0, ("%s: invalid msr %lx", __func__, msr)); KASSERT(index >= 0 && index < SVM_MSR_BITMAP_SIZE, - ("%s: invalid index %d for msr %#lx", __func__, index, msr)); + ("%s: invalid index %d for msr %lx", __func__, index, msr)); KASSERT(bit >= 0 && bit <= 6, ("%s: invalid bit position %d " - "msr %#lx", __func__, bit, msr)); + "msr %lx", __func__, bit, msr)); if (read) perm_bitmap[index] &= ~(1UL << bit); @@ -444,7 +444,7 @@ svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask, if (ctrl->intercept[idx] != oldval) { svm_set_dirty(sc, vcpu, VMCB_CACHE_I); VCPU_CTR3(sc->vm, vcpu, "intercept[%d] modified " - "from %#x to %#x", idx, oldval, ctrl->intercept[idx]); + "from %x to %x", idx, oldval, ctrl->intercept[idx]); } } @@ -527,11 +527,23 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MONITOR); svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MWAIT); + /* Intercept privileged invalidation instructions. */ + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVD); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVLPGA); + /* + * Intercept all virtualization-related instructions. + * * From section "Canonicalization and Consistency Checks" in APMv2 * the VMRUN intercept bit must be set to pass the consistency check. */ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMMCALL); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMLOAD); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMSAVE); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_STGI); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT); /* * The ASID will be set to a non-zero value just before VMRUN. @@ -672,22 +684,19 @@ svm_cpl(struct vmcb_state *state) static enum vm_cpu_mode svm_vcpu_mode(struct vmcb *vmcb) { - struct vmcb_segment seg; struct vmcb_state *state; - int error; state = &vmcb->state; if (state->efer & EFER_LMA) { - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(cs) error %d", __func__, - error)); + struct vmcb_segment *seg; /* * Section 4.8.1 for APM2, check if Code Segment has * Long attribute set in descriptor. */ - if (seg.attrib & VMCB_CS_ATTRIB_L) + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + if (seg->attrib & VMCB_CS_ATTRIB_L) return (CPU_MODE_64BIT); else return (CPU_MODE_COMPATIBILITY); @@ -848,10 +857,9 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit, struct vmcb *vmcb; struct vie *vie; struct vm_guest_paging paging; - struct vmcb_segment seg; + struct vmcb_segment *seg; char *inst_bytes = NULL; uint8_t inst_len = 0; - int error; vmcb = svm_get_vmcb(svm_sc, vcpu); ctrl = &vmcb->ctrl; @@ -861,22 +869,21 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit, vmexit->u.mmio_emul.gla = VIE_INVALID_GLA; svm_paging_info(vmcb, &paging); - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error)); - switch (paging.cpu_mode) { case CPU_MODE_REAL: - vmexit->u.mmio_emul.cs_base = seg.base; + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + vmexit->u.mmio_emul.cs_base = seg->base; vmexit->u.mmio_emul.cs_d = 0; break; case CPU_MODE_PROTECTED: case CPU_MODE_COMPATIBILITY: - vmexit->u.mmio_emul.cs_base = seg.base; + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + vmexit->u.mmio_emul.cs_base = seg->base; /* * Section 4.8.1 of APM2, Default Operand Size or D bit. */ - vmexit->u.mmio_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ? + vmexit->u.mmio_emul.cs_d = (seg->attrib & VMCB_CS_ATTRIB_D) ? 1 : 0; break; default: @@ -927,7 +934,7 @@ svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector, ctrl = svm_get_vmcb_ctrl(sc, vcpu); KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, - ("%s: event already pending %#lx", __func__, ctrl->eventinj)); + ("%s: event already pending %lx", __func__, ctrl->eventinj)); KASSERT(vector >=0 && vector <= 255, ("%s: invalid vector %d", __func__, vector)); @@ -949,7 +956,7 @@ svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector, if (ec_valid) { ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID; ctrl->eventinj |= (uint64_t)error << 32; - VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %#x", + VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %x", intrtype_to_str(intr_type), vector, error); } else { VCPU_CTR2(sc->vm, vcpu, "Injecting %s at vector %d", @@ -1050,32 +1057,6 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu) svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR); } -static int -svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val) -{ - struct vmcb_ctrl *ctrl; - int oldval, newval; - - ctrl = svm_get_vmcb_ctrl(sc, vcpu); - oldval = ctrl->intr_shadow; - newval = val ? 1 : 0; - if (newval != oldval) { - ctrl->intr_shadow = newval; - VCPU_CTR1(sc->vm, vcpu, "Setting intr_shadow to %d", newval); - } - return (0); -} - -static int -svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val) -{ - struct vmcb_ctrl *ctrl; - - ctrl = svm_get_vmcb_ctrl(sc, vcpu); - *val = ctrl->intr_shadow; - return (0); -} - /* * Once an NMI is injected it blocks delivery of further NMIs until the handler * executes an IRET. The IRET intercept is enabled when an NMI is injected to @@ -1103,7 +1084,7 @@ enable_nmi_blocking(struct svm_softc *sc, int vcpu) static void clear_nmi_blocking(struct svm_softc *sc, int vcpu) { - int error; + struct vmcb_ctrl *ctrl; KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked")); VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared"); @@ -1124,8 +1105,8 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu) * Set 'intr_shadow' to prevent an NMI from being injected on the * immediate VMRUN. */ - error = svm_modify_intr_shadow(sc, vcpu, 1); - KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error)); + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl->intr_shadow = 1; } #define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL @@ -1141,7 +1122,7 @@ svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval, bool *retu) state = svm_get_vmcb_state(sc, vcpu); oldval = state->efer; - VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %#lx/%#lx", oldval, newval); + VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %lx/%lx", oldval, newval); newval &= ~0xFE; /* clear the Read-As-Zero (RAZ) bits */ changed = oldval ^ newval; @@ -1275,7 +1256,7 @@ exit_reason_to_str(uint64_t reason) case VMCB_EXIT_MWAIT: return ("mwait"); default: - snprintf(reasonbuf, sizeof(reasonbuf), "%#lx", reason); + snprintf(reasonbuf, sizeof(reasonbuf), "%lx", reason); return (reasonbuf); } } @@ -1350,10 +1331,10 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) } KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event " - "injection valid bit is set %#lx", __func__, ctrl->eventinj)); + "injection valid bit is set %lx", __func__, ctrl->eventinj)); KASSERT(vmexit->inst_length >= 0 && vmexit->inst_length <= 15, - ("invalid inst_length %d: code (%#lx), info1 (%#lx), info2 (%#lx)", + ("invalid inst_length %d: code (%lx), info1 (%lx), info2 (%lx)", vmexit->inst_length, code, info1, info2)); svm_update_virqinfo(svm_sc, vcpu); @@ -1445,7 +1426,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) if (reflect) { /* Reflect the exception back into the guest */ VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception " - "%d/%#x into the guest", idtvec, (int)info1); + "%d/%x into the guest", idtvec, (int)info1); error = vm_inject_exception(svm_sc->vm, vcpu, idtvec, errcode_valid, info1, 0); KASSERT(error == 0, ("%s: vm_inject_exception error %d", @@ -1462,7 +1443,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) if (info1) { vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1); val = (uint64_t)edx << 32 | eax; - VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %#x val %#lx", + VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %x val %lx", ecx, val); if (emulate_wrmsr(svm_sc, vcpu, ecx, val, &retu)) { vmexit->exitcode = VM_EXITCODE_WRMSR; @@ -1475,7 +1456,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) ("emulate_wrmsr retu with bogus exitcode")); } } else { - VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %#x", ecx); + VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %x", ecx); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1); if (emulate_rdmsr(svm_sc, vcpu, ecx, &retu)) { vmexit->exitcode = VM_EXITCODE_RDMSR; @@ -1492,6 +1473,31 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) handled = svm_handle_inout(svm_sc, vcpu, vmexit); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1); break; + case VMCB_EXIT_SHUTDOWN: + vm_suspend(svm_sc->vm, VM_SUSPEND_TRIPLEFAULT); + handled = 1; + break; + case VMCB_EXIT_INVD: + case VMCB_EXIT_INVLPGA: + /* privileged invalidation instructions */ + vm_inject_ud(svm_sc->vm, vcpu); + handled = 1; + break; + case VMCB_EXIT_VMRUN: + case VMCB_EXIT_VMLOAD: + case VMCB_EXIT_VMSAVE: + case VMCB_EXIT_STGI: + case VMCB_EXIT_CLGI: + case VMCB_EXIT_SKINIT: + /* privileged vmm instructions */ + vm_inject_ud(svm_sc->vm, vcpu); + handled = 1; + break; + case VMCB_EXIT_VMMCALL: + /* No handlers make use of VMMCALL for now */ + vm_inject_ud(svm_sc->vm, vcpu); + handled = 1; + break; case VMCB_EXIT_CPUID: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1); handled = x86_emulate_cpuid(svm_sc->vm, vcpu, &state->rax, @@ -1510,7 +1516,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) /* EXITINFO2 contains the faulting guest physical address */ if (info1 & VMCB_NPF_INFO1_RSV) { VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with " - "reserved bits set: info1(%#lx) info2(%#lx)", + "reserved bits set: info1(%lx) info2(%lx)", info1, info2); } else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) { vmexit->exitcode = VM_EXITCODE_PAGING; @@ -1518,13 +1524,13 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) vmexit->u.paging.fault_type = npf_fault_type(info1); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_NESTED_FAULT, 1); VCPU_CTR3(svm_sc->vm, vcpu, "nested page fault " - "on gpa %#lx/%#lx at rip %#lx", + "on gpa %lx/%lx at rip %lx", info2, info1, state->rip); } else if (svm_npf_emul_fault(info1)) { svm_handle_mmio_emul(svm_sc, vcpu, vmexit, info2); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MMIO_EMUL, 1); VCPU_CTR3(svm_sc->vm, vcpu, "mmio_emul fault " - "for gpa %#lx/%#lx at rip %#lx", + "for gpa %lx/%lx at rip %lx", info2, info1, state->rip); } break; @@ -1539,7 +1545,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) break; } - VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d", + VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %lx/%d", handled ? "handled" : "unhandled", exit_reason_to_str(code), vmexit->rip, vmexit->inst_length); @@ -1576,14 +1582,14 @@ svm_inj_intinfo(struct svm_softc *svm_sc, int vcpu) return; KASSERT(VMCB_EXITINTINFO_VALID(intinfo), ("%s: entry intinfo is not " - "valid: %#lx", __func__, intinfo)); + "valid: %lx", __func__, intinfo)); svm_eventinject(svm_sc, vcpu, VMCB_EXITINTINFO_TYPE(intinfo), VMCB_EXITINTINFO_VECTOR(intinfo), VMCB_EXITINTINFO_EC(intinfo), VMCB_EXITINTINFO_EC_VALID(intinfo)); vmm_stat_incr(svm_sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1); - VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %#lx", intinfo); + VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %lx", intinfo); } /* @@ -1610,7 +1616,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) if (vcpustate->nextrip != state->rip) { ctrl->intr_shadow = 0; VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking " - "cleared due to rip change: %#lx/%#lx", + "cleared due to rip change: %lx/%lx", vcpustate->nextrip, state->rip); } @@ -1648,7 +1654,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) * then defer the NMI until after that. */ VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to " - "eventinj %#lx", ctrl->eventinj); + "eventinj %lx", ctrl->eventinj); /* * Use self-IPI to trigger a VM-exit as soon as @@ -1694,7 +1700,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) */ if ((state->rflags & PSL_I) == 0) { VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to " - "rflags %#lx", vector, state->rflags); + "rflags %lx", vector, state->rflags); need_intr_window = 1; goto done; } @@ -1708,7 +1714,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) if (ctrl->eventinj & VMCB_EVENTINJ_VALID) { VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to " - "eventinj %#lx", vector, ctrl->eventinj); + "eventinj %lx", vector, ctrl->eventinj); need_intr_window = 1; goto done; } @@ -1742,9 +1748,9 @@ done: * VMRUN. */ v_tpr = vlapic_get_cr8(vlapic); - KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr)); + KASSERT(v_tpr <= 15, ("invalid v_tpr %x", v_tpr)); if (ctrl->v_tpr != v_tpr) { - VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x", + VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %x to %x", ctrl->v_tpr, v_tpr); ctrl->v_tpr = v_tpr; svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR); @@ -1762,8 +1768,8 @@ done: */ KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 || (state->rflags & PSL_I) == 0 || ctrl->intr_shadow, - ("Bogus intr_window_exiting: eventinj (%#lx), " - "intr_shadow (%u), rflags (%#lx)", + ("Bogus intr_window_exiting: eventinj (%lx), " + "intr_shadow (%lu), rflags (%lx)", ctrl->eventinj, ctrl->intr_shadow, state->rflags)); enable_intr_window_exiting(sc, vcpu); } else { @@ -1838,7 +1844,7 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu) */ KASSERT(!alloc_asid, ("ASID allocation not necessary")); KASSERT(ctrl->tlb_ctrl == VMCB_TLB_FLUSH_NOTHING, - ("Invalid VMCB tlb_ctrl: %#x", ctrl->tlb_ctrl)); + ("Invalid VMCB tlb_ctrl: %x", ctrl->tlb_ctrl)); } if (alloc_asid) { @@ -1968,7 +1974,7 @@ svm_dr_leave_guest(struct svm_regctx *gctx) * Start vcpu with specified RIP. */ static int -svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, +svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap, struct vm_eventinfo *evinfo) { struct svm_regctx *gctx; @@ -2109,10 +2115,10 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, ctrl->vmcb_clean = vmcb_clean & ~vcpustate->dirty; vcpustate->dirty = 0; - VCPU_CTR1(vm, vcpu, "vmcb clean %#x", ctrl->vmcb_clean); + VCPU_CTR1(vm, vcpu, "vmcb clean %x", ctrl->vmcb_clean); /* Launch Virtual Machine. */ - VCPU_CTR1(vm, vcpu, "Resume execution at %#lx", state->rip); + VCPU_CTR1(vm, vcpu, "Resume execution at %lx", state->rip); svm_dr_enter_guest(gctx); svm_launch(vmcb_pa, gctx, get_pcpu()); svm_dr_leave_guest(gctx); @@ -2152,10 +2158,9 @@ svm_vmcleanup(void *arg) free(sc, M_SVM); } -static register_t * +static uint64_t * swctx_regptr(struct svm_regctx *regctx, int reg) { - switch (reg) { case VM_REG_GUEST_RBX: return (®ctx->sctx_rbx); @@ -2201,56 +2206,135 @@ swctx_regptr(struct svm_regctx *regctx, int reg) static int svm_getreg(void *arg, int vcpu, int ident, uint64_t *val) { - struct svm_softc *svm_sc; - register_t *reg; - - svm_sc = arg; + struct svm_softc *sc; + struct vmcb *vmcb; + uint64_t *regp; + uint64_t *fieldp; + struct vmcb_segment *seg; - if (ident == VM_REG_GUEST_INTR_SHADOW) { - return (svm_get_intr_shadow(svm_sc, vcpu, val)); - } + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); - if (vmcb_read(svm_sc, vcpu, ident, val) == 0) { + regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident); + if (regp != NULL) { + *val = *regp; return (0); } - reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + switch (ident) { + case VM_REG_GUEST_INTR_SHADOW: + *val = (vmcb->ctrl.intr_shadow != 0) ? 1 : 0; + break; - if (reg != NULL) { - *val = *reg; - return (0); + case VM_REG_GUEST_CR0: + case VM_REG_GUEST_CR2: + case VM_REG_GUEST_CR3: + case VM_REG_GUEST_CR4: + case VM_REG_GUEST_DR6: + case VM_REG_GUEST_DR7: + case VM_REG_GUEST_EFER: + case VM_REG_GUEST_RAX: + case VM_REG_GUEST_RFLAGS: + case VM_REG_GUEST_RIP: + case VM_REG_GUEST_RSP: + fieldp = vmcb_regptr(vmcb, ident, NULL); + *val = *fieldp; + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + seg = vmcb_segptr(vmcb, ident); + *val = seg->selector; + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + /* GDTR and IDTR don't have segment selectors */ + return (EINVAL); + + default: + return (EINVAL); } - VCPU_CTR1(svm_sc->vm, vcpu, "svm_getreg: unknown register %#x", ident); - return (EINVAL); + return (0); } static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val) { - struct svm_softc *svm_sc; - register_t *reg; - - svm_sc = arg; + struct svm_softc *sc; + struct vmcb *vmcb; + uint64_t *regp; + uint64_t *fieldp; + uint32_t dirty; + struct vmcb_segment *seg; - if (ident == VM_REG_GUEST_INTR_SHADOW) { - return (svm_modify_intr_shadow(svm_sc, vcpu, val)); - } + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); - if (vmcb_write(svm_sc, vcpu, ident, val) == 0) { + regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident); + if (regp != NULL) { + *regp = val; return (0); } - reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + dirty = VMCB_CACHE_NONE; + switch (ident) { + case VM_REG_GUEST_INTR_SHADOW: + vmcb->ctrl.intr_shadow = (val != 0) ? 1 : 0; + break; - if (reg != NULL) { - *reg = val; - return (0); + case VM_REG_GUEST_EFER: + fieldp = vmcb_regptr(vmcb, ident, &dirty); + /* EFER_SVM must always be set when the guest is executing */ + *fieldp = val | EFER_SVM; + dirty |= VMCB_CACHE_CR; + break; + + case VM_REG_GUEST_CR0: + case VM_REG_GUEST_CR2: + case VM_REG_GUEST_CR3: + case VM_REG_GUEST_CR4: + case VM_REG_GUEST_DR6: + case VM_REG_GUEST_DR7: + case VM_REG_GUEST_RAX: + case VM_REG_GUEST_RFLAGS: + case VM_REG_GUEST_RIP: + case VM_REG_GUEST_RSP: + fieldp = vmcb_regptr(vmcb, ident, &dirty); + *fieldp = val; + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + dirty |= VMCB_CACHE_SEG; + seg = vmcb_segptr(vmcb, ident); + seg->selector = (uint16_t)val; + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + /* GDTR and IDTR don't have segment selectors */ + return (EINVAL); + + default: + return (EINVAL); } - if (ident == VM_REG_GUEST_ENTRY_INST_LENGTH) { - /* Ignore. */ - return (0); + if (dirty != VMCB_CACHE_NONE) { + svm_set_dirty(sc, vcpu, dirty); } /* @@ -2259,8 +2343,119 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val) * whether 'running' is true/false. */ - VCPU_CTR1(svm_sc->vm, vcpu, "svm_setreg: unknown register %#x", ident); - return (EINVAL); + return (0); +} + +static int +svm_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +{ + struct vmcb *vmcb; + struct svm_softc *sc; + struct vmcb_segment *seg; + + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); + + switch (reg) { + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); + seg = vmcb_segptr(vmcb, reg); + /* + * Map seg_desc access to VMCB attribute format. + * + * SVM uses the 'P' bit in the segment attributes to indicate a + * NULL segment so clear it if the segment is marked unusable. + */ + seg->attrib = VMCB_ACCESS2ATTR(desc->access); + if (SEG_DESC_UNUSABLE(desc->access)) { + seg->attrib &= ~0x80; + } + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + svm_set_dirty(sc, vcpu, VMCB_CACHE_DT); + seg = vmcb_segptr(vmcb, reg); + break; + + default: + return (EINVAL); + } + + ASSERT(seg != NULL); + seg->base = desc->base; + seg->limit = desc->limit; + + return (0); +} + +static int +svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +{ + struct vmcb *vmcb; + struct svm_softc *sc; + struct vmcb_segment *seg; + + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); + + switch (reg) { + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_LDTR: + seg = vmcb_segptr(vmcb, reg); + desc->access = VMCB_ATTR2ACCESS(seg->attrib); + /* + * VT-x uses bit 16 to indicate a segment that has been loaded + * with a NULL selector (aka unusable). The 'desc->access' + * field is interpreted in the VT-x format by the + * processor-independent code. + * + * SVM uses the 'P' bit to convey the same information so + * convert it into the VT-x format. For more details refer to + * section "Segment State in the VMCB" in APMv2. + */ + if ((desc->access & 0x80) == 0) { + /* Unusable segment */ + desc->access |= 0x10000; + } + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_TR: + seg = vmcb_segptr(vmcb, reg); + desc->access = VMCB_ATTR2ACCESS(seg->attrib); + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + seg = vmcb_segptr(vmcb, reg); + /* + * Since there are no access bits associated with the GDTR or + * the IDTR, zero out the field to ensure it does not contain + * garbage which might confuse the consumer. + */ + desc->access = 0; + break; + + default: + return (EINVAL); + } + + ASSERT(seg != NULL); + desc->base = seg->base; + desc->limit = seg->limit; + return (0); } static int @@ -2368,8 +2563,8 @@ struct vmm_ops vmm_ops_amd = { .vmcleanup = svm_vmcleanup, .vmgetreg = svm_getreg, .vmsetreg = svm_setreg, - .vmgetdesc = vmcb_getdesc, - .vmsetdesc = vmcb_setdesc, + .vmgetdesc = svm_getdesc, + .vmsetdesc = svm_setdesc, .vmgetcap = svm_getcap, .vmsetcap = svm_setcap, .vmspace_alloc = svm_npt_alloc, diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.h b/usr/src/uts/i86pc/io/vmm/amd/svm.h index c78f7eb067..19739884c2 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.h +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.h @@ -35,31 +35,31 @@ * Guest register state that is saved outside the VMCB. */ struct svm_regctx { - register_t sctx_rbp; - register_t sctx_rbx; - register_t sctx_rcx; - register_t sctx_rdx; - register_t sctx_rdi; - register_t sctx_rsi; - register_t sctx_r8; - register_t sctx_r9; - register_t sctx_r10; - register_t sctx_r11; - register_t sctx_r12; - register_t sctx_r13; - register_t sctx_r14; - register_t sctx_r15; - register_t sctx_dr0; - register_t sctx_dr1; - register_t sctx_dr2; - register_t sctx_dr3; + uint64_t sctx_rbp; + uint64_t sctx_rbx; + uint64_t sctx_rcx; + uint64_t sctx_rdx; + uint64_t sctx_rdi; + uint64_t sctx_rsi; + uint64_t sctx_r8; + uint64_t sctx_r9; + uint64_t sctx_r10; + uint64_t sctx_r11; + uint64_t sctx_r12; + uint64_t sctx_r13; + uint64_t sctx_r14; + uint64_t sctx_r15; + uint64_t sctx_dr0; + uint64_t sctx_dr1; + uint64_t sctx_dr2; + uint64_t sctx_dr3; - register_t host_dr0; - register_t host_dr1; - register_t host_dr2; - register_t host_dr3; - register_t host_dr6; - register_t host_dr7; + uint64_t host_dr0; + uint64_t host_dr1; + uint64_t host_dr2; + uint64_t host_dr3; + uint64_t host_dr6; + uint64_t host_dr7; uint64_t host_debugctl; }; diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c index 5075b69867..b00f974c23 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c +++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c @@ -26,429 +26,130 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ + #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> -#include <machine/segments.h> -#include <machine/specialreg.h> #include <machine/vmm.h> -#include "vmm_ktr.h" - #include "vmcb.h" #include "svm.h" -#include "svm_softc.h" -/* - * The VMCB aka Virtual Machine Control Block is a 4KB aligned page - * in memory that describes the virtual machine. - * - * The VMCB contains: - * - instructions or events in the guest to intercept - * - control bits that modify execution environment of the guest - * - guest processor state (e.g. general purpose registers) - */ - -/* - * Return VMCB segment area. - */ -static struct vmcb_segment * +struct vmcb_segment * vmcb_segptr(struct vmcb *vmcb, int type) { - struct vmcb_state *state; - struct vmcb_segment *seg; - - state = &vmcb->state; + struct vmcb_state *state = &vmcb->state; switch (type) { case VM_REG_GUEST_CS: - seg = &state->cs; - break; - + return (&state->cs); case VM_REG_GUEST_DS: - seg = &state->ds; - break; - + return (&state->ds); case VM_REG_GUEST_ES: - seg = &state->es; - break; - + return (&state->es); case VM_REG_GUEST_FS: - seg = &state->fs; - break; - + return (&state->fs); case VM_REG_GUEST_GS: - seg = &state->gs; - break; - + return (&state->gs); case VM_REG_GUEST_SS: - seg = &state->ss; - break; - + return (&state->ss); case VM_REG_GUEST_GDTR: - seg = &state->gdt; - break; - + return (&state->gdt); case VM_REG_GUEST_IDTR: - seg = &state->idt; - break; - + return (&state->idt); case VM_REG_GUEST_LDTR: - seg = &state->ldt; - break; - + return (&state->ldt); case VM_REG_GUEST_TR: - seg = &state->tr; - break; - + return (&state->tr); default: - seg = NULL; - break; + panic("unexpected seg %d", type); } - - return (seg); } -static int -vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident, - uint64_t *val) +uint64_t * +vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp) { - struct vmcb *vmcb; - int off, bytes; - char *ptr; - - vmcb = svm_get_vmcb(softc, vcpu); - off = VMCB_ACCESS_OFFSET(ident); - bytes = VMCB_ACCESS_BYTES(ident); - - if ((off + bytes) >= sizeof (struct vmcb)) - return (EINVAL); - - ptr = (char *)vmcb; - - if (!write) - *val = 0; - - switch (bytes) { - case 8: - case 4: - case 2: - if (write) - memcpy(ptr + off, val, bytes); - else - memcpy(val, ptr + off, bytes); - break; - default: - VCPU_CTR1(softc->vm, vcpu, - "Invalid size %d for VMCB access: %d", bytes); - return (EINVAL); - } - - /* Invalidate all VMCB state cached by h/w. */ - if (write) - svm_set_dirty(softc, vcpu, 0xffffffff); - - return (0); -} - -/* - * Read from segment selector, control and general purpose register of VMCB. - */ -int -vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval) -{ - struct vmcb *vmcb; struct vmcb_state *state; - struct vmcb_segment *seg; - int err; + uint64_t *res = NULL; + uint32_t dirty = VMCB_CACHE_NONE; - vmcb = svm_get_vmcb(sc, vcpu); state = &vmcb->state; - err = 0; - - if (VMCB_ACCESS_OK(ident)) - return (vmcb_access(sc, vcpu, 0, ident, retval)); switch (ident) { case VM_REG_GUEST_CR0: - *retval = state->cr0; + res = &state->cr0; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_CR2: - *retval = state->cr2; + res = &state->cr2; + dirty = VMCB_CACHE_CR2; break; case VM_REG_GUEST_CR3: - *retval = state->cr3; + res = &state->cr3; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_CR4: - *retval = state->cr4; + res = &state->cr4; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_DR6: - *retval = state->dr6; + res = &state->dr6; + dirty = VMCB_CACHE_DR; break; case VM_REG_GUEST_DR7: - *retval = state->dr7; + res = &state->dr7; + dirty = VMCB_CACHE_DR; break; case VM_REG_GUEST_EFER: - *retval = state->efer; + res = &state->efer; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_RAX: - *retval = state->rax; + res = &state->rax; break; case VM_REG_GUEST_RFLAGS: - *retval = state->rflags; + res = &state->rflags; break; case VM_REG_GUEST_RIP: - *retval = state->rip; + res = &state->rip; break; case VM_REG_GUEST_RSP: - *retval = state->rsp; - break; - - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_FS: - case VM_REG_GUEST_GS: - case VM_REG_GUEST_SS: - case VM_REG_GUEST_LDTR: - case VM_REG_GUEST_TR: - seg = vmcb_segptr(vmcb, ident); - KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB", - __func__, ident)); - *retval = seg->selector; + res = &state->rsp; break; - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - /* GDTR and IDTR don't have segment selectors */ - err = EINVAL; - break; default: - err = EINVAL; + panic("unexpected register %d", ident); break; } - return (err); -} - -/* - * Write to segment selector, control and general purpose register of VMCB. - */ -int -vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val) -{ - struct vmcb *vmcb; - struct vmcb_state *state; - struct vmcb_segment *seg; - int err, dirtyseg; - - vmcb = svm_get_vmcb(sc, vcpu); - state = &vmcb->state; - dirtyseg = 0; - err = 0; - - if (VMCB_ACCESS_OK(ident)) - return (vmcb_access(sc, vcpu, 1, ident, &val)); - - switch (ident) { - case VM_REG_GUEST_CR0: - state->cr0 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_CR2: - state->cr2 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR2); - break; - - case VM_REG_GUEST_CR3: - state->cr3 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_CR4: - state->cr4 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_DR6: - state->dr6 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_DR); - break; - - case VM_REG_GUEST_DR7: - state->dr7 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_DR); - break; - - case VM_REG_GUEST_EFER: - /* EFER_SVM must always be set when the guest is executing */ - state->efer = val | EFER_SVM; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_RAX: - state->rax = val; - break; - - case VM_REG_GUEST_RFLAGS: - state->rflags = val; - break; - - case VM_REG_GUEST_RIP: - state->rip = val; - break; - - case VM_REG_GUEST_RSP: - state->rsp = val; - break; - - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_SS: - dirtyseg = 1; /* FALLTHROUGH */ - case VM_REG_GUEST_FS: - case VM_REG_GUEST_GS: - case VM_REG_GUEST_LDTR: - case VM_REG_GUEST_TR: - seg = vmcb_segptr(vmcb, ident); - KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB", - __func__, ident)); - seg->selector = val; - if (dirtyseg) - svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); - break; - - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - /* GDTR and IDTR don't have segment selectors */ - err = EINVAL; - break; - default: - err = EINVAL; - break; - } - - return (err); -} - -int -vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg2) -{ - struct vmcb_segment *seg; - - seg = vmcb_segptr(vmcb, ident); - if (seg != NULL) { - bcopy(seg, seg2, sizeof(struct vmcb_segment)); - return (0); - } else { - return (EINVAL); - } -} - -int -vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) -{ - struct vmcb *vmcb; - struct svm_softc *sc; - struct vmcb_segment *seg; - uint16_t attrib; - - sc = arg; - vmcb = svm_get_vmcb(sc, vcpu); - - seg = vmcb_segptr(vmcb, reg); - KASSERT(seg != NULL, ("%s: invalid segment descriptor %d", - __func__, reg)); - - seg->base = desc->base; - seg->limit = desc->limit; - if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) { - /* - * Map seg_desc access to VMCB attribute format. - * - * SVM uses the 'P' bit in the segment attributes to indicate a - * NULL segment so clear it if the segment is marked unusable. - */ - attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF); - if (SEG_DESC_UNUSABLE(desc->access)) { - attrib &= ~0x80; - } - seg->attrib = attrib; - } - - VCPU_CTR4(sc->vm, vcpu, "Setting desc %d: base (%#lx), limit (%#x), " - "attrib (%#x)", reg, seg->base, seg->limit, seg->attrib); - - switch (reg) { - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_SS: - svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); - break; - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - svm_set_dirty(sc, vcpu, VMCB_CACHE_DT); - break; - default: - break; + ASSERT(res != NULL); + if (dirtyp != NULL) { + *dirtyp |= dirty; } - - return (0); -} - -int -vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) -{ - struct vmcb *vmcb; - struct svm_softc *sc; - struct vmcb_segment *seg; - - sc = arg; - vmcb = svm_get_vmcb(sc, vcpu); - seg = vmcb_segptr(vmcb, reg); - KASSERT(seg != NULL, ("%s: invalid segment descriptor %d", - __func__, reg)); - - desc->base = seg->base; - desc->limit = seg->limit; - desc->access = 0; - - if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) { - /* Map seg_desc access to VMCB attribute format */ - desc->access = ((seg->attrib & 0xF00) << 4) | - (seg->attrib & 0xFF); - - /* - * VT-x uses bit 16 to indicate a segment that has been loaded - * with a NULL selector (aka unusable). The 'desc->access' - * field is interpreted in the VT-x format by the - * processor-independent code. - * - * SVM uses the 'P' bit to convey the same information so - * convert it into the VT-x format. For more details refer to - * section "Segment State in the VMCB" in APMv2. - */ - if (reg != VM_REG_GUEST_CS && reg != VM_REG_GUEST_TR) { - if ((desc->access & 0x80) == 0) - desc->access |= 0x10000; /* Unusable segment */ - } - } - - return (0); + return (res); } diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h index 88f65df66a..63b088253d 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h +++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h @@ -86,8 +86,8 @@ struct svm_softc; #define VMCB_INTCPT_INVD BIT(22) #define VMCB_INTCPT_PAUSE BIT(23) #define VMCB_INTCPT_HLT BIT(24) -#define VMCB_INTCPT_INVPG BIT(25) -#define VMCB_INTCPT_INVPGA BIT(26) +#define VMCB_INTCPT_INVLPG BIT(25) +#define VMCB_INTCPT_INVLPGA BIT(26) #define VMCB_INTCPT_IO BIT(27) #define VMCB_INTCPT_MSR BIT(28) #define VMCB_INTCPT_TASK_SWITCH BIT(29) @@ -149,12 +149,21 @@ struct svm_softc; #define VMCB_EXIT_POPF 0x71 #define VMCB_EXIT_CPUID 0x72 #define VMCB_EXIT_IRET 0x74 +#define VMCB_EXIT_INVD 0x76 #define VMCB_EXIT_PAUSE 0x77 #define VMCB_EXIT_HLT 0x78 +#define VMCB_EXIT_INVLPG 0x79 +#define VMCB_EXIT_INVLPGA 0x7A #define VMCB_EXIT_IO 0x7B #define VMCB_EXIT_MSR 0x7C #define VMCB_EXIT_SHUTDOWN 0x7F +#define VMCB_EXIT_VMRUN 0x80 +#define VMCB_EXIT_VMMCALL 0x81 +#define VMCB_EXIT_VMLOAD 0x82 #define VMCB_EXIT_VMSAVE 0x83 +#define VMCB_EXIT_STGI 0x84 +#define VMCB_EXIT_CLGI 0x85 +#define VMCB_EXIT_SKINIT 0x86 #define VMCB_EXIT_MONITOR 0x8A #define VMCB_EXIT_MWAIT 0x8B #define VMCB_EXIT_NPF 0x400 @@ -212,15 +221,6 @@ struct svm_softc; #define VMCB_OFF_SYSENTER_EIP VMCB_OFF_STATE(0x238) #define VMCB_OFF_GUEST_PAT VMCB_OFF_STATE(0x268) -/* - * Encode the VMCB offset and bytes that we want to read from VMCB. - */ -#define VMCB_ACCESS(o, w) (0x80000000 | (((w) & 0xF) << 16) | \ - ((o) & 0xFFF)) -#define VMCB_ACCESS_OK(v) ((v) & 0x80000000 ) -#define VMCB_ACCESS_BYTES(v) (((v) >> 16) & 0xF) -#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF) - #ifdef _KERNEL /* VMCB save state area segment format */ struct vmcb_segment { @@ -231,6 +231,10 @@ struct vmcb_segment { }; CTASSERT(sizeof(struct vmcb_segment) == 16); +/* Convert to/from vmcb segment access to generic (VMX) access */ +#define VMCB_ATTR2ACCESS(attr) ((((attr) & 0xf00) << 4) | ((attr) & 0xff)) +#define VMCB_ACCESS2ATTR(acc) ((((acc) & 0xf000) >> 4) | ((acc) & 0xff)) + /* Code segment descriptor attribute in 12 bit format as saved by VMCB. */ #define VMCB_CS_ATTRIB_L BIT(9) /* Long mode. */ #define VMCB_CS_ATTRIB_D BIT(10) /* OPerand size bit. */ @@ -360,6 +364,15 @@ struct vmcb_state { CTASSERT(sizeof(struct vmcb_state) == 0xC00); CTASSERT(offsetof(struct vmcb_state, int_to) == 0x290); +/* + * The VMCB aka Virtual Machine Control Block is a 4KB aligned page + * in memory that describes the virtual machine. + * + * The VMCB contains: + * - instructions or events in the guest to intercept + * - control bits that modify execution environment of the guest + * - guest processor state (e.g. general purpose registers) + */ struct vmcb { struct vmcb_ctrl ctrl; struct vmcb_state state; @@ -367,11 +380,8 @@ struct vmcb { CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); CTASSERT(offsetof(struct vmcb, state) == 0x400); -int vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval); -int vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val); -int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc); -int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc); -int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg); +struct vmcb_segment *vmcb_segptr(struct vmcb *vmcb, int type); +uint64_t *vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp); #endif /* _KERNEL */ #endif /* _VMCB_H_ */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c index 5e5253780e..5e3bd6d309 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/ept.c +++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c @@ -59,7 +59,6 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> -#include "vmx_cpufunc.h" #include "ept.h" #define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0)) @@ -171,31 +170,12 @@ ept_dump(uint64_t *ptp, int nlevels) } #endif -#ifdef __FreeBSD__ -static void -invept_single_context(void *arg) -{ - struct invept_desc desc = *(struct invept_desc *)arg; - - invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); -} - -void -ept_invalidate_mappings(u_long eptp) -{ - struct invept_desc invept_desc = { 0 }; - invept_desc.eptp = eptp; - - smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc); -} -#else /* __FreeBSD__ */ void ept_invalidate_mappings(u_long eptp) { hma_vmx_invept_allcpus((uintptr_t)eptp); } -#endif /* __FreeBSD__ */ static int ept_pinit(pmap_t pmap) diff --git a/usr/src/uts/i86pc/io/vmm/intel/offsets.in b/usr/src/uts/i86pc/io/vmm/intel/offsets.in index cc041eaefc..ca7f967f3b 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/offsets.in +++ b/usr/src/uts/i86pc/io/vmm/intel/offsets.in @@ -22,7 +22,6 @@ #include <machine/pmap.h> #include <machine/vmm.h> -#include "intel/vmx_cpufunc.h" #include "intel/vmx.h" #include "vm/vm_glue.h" diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c index f1a08cc57d..36318b1b49 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c @@ -39,59 +39,24 @@ * * Copyright 2014 Pluribus Networks Inc. * Copyright 2017 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ -#ifdef __FreeBSD__ -#include "opt_ddb.h" -#endif - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> -#include <sys/sysctl.h> #include <sys/systm.h> -#include <sys/pcpu.h> #include <vm/vm.h> -#include <vm/pmap.h> -#include <machine/segments.h> #include <machine/vmm.h> -#include "vmm_host.h" -#include "vmx_cpufunc.h" -#include "vmcs.h" -#include "ept.h" #include "vmx.h" -#ifdef DDB -#include <ddb/ddb.h> -#endif - -SYSCTL_DECL(_hw_vmm_vmx); - -static int no_flush_rsb; -SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, - &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); - -static uint64_t -vmcs_fix_regval(uint32_t encoding, uint64_t val) -{ - - switch (encoding) { - case VMCS_GUEST_CR0: - val = vmx_fix_cr0(val); - break; - case VMCS_GUEST_CR4: - val = vmx_fix_cr4(val); - break; - default: - break; - } - return (val); -} +/* Bits 0-30 of VMX_BASIC MSR contain VMCS revision identifier */ +#define VMX_BASIC_REVISION(v) ((v) & 0x7fffffff) -static uint32_t +uint32_t vmcs_field_encoding(int ident) { switch (ident) { @@ -138,15 +103,13 @@ vmcs_field_encoding(int ident) case VM_REG_GUEST_ENTRY_INST_LENGTH: return (VMCS_ENTRY_INST_LENGTH); default: - return (-1); + return (VMCS_INVALID_ENCODING); } - } -static int +void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) { - switch (seg) { case VM_REG_GUEST_ES: *base = VMCS_GUEST_ES_BASE; @@ -199,364 +162,111 @@ vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) *acc = VMCS_INVALID_ENCODING; break; default: - return (EINVAL); + panic("invalid segment register %d", seg); } - - return (0); } -int -vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval) +void +vmcs_clear(uintptr_t vmcs_pa) { - int error; - uint32_t encoding; - - /* - * If we need to get at vmx-specific state in the VMCS we can bypass - * the translation of 'ident' to 'encoding' by simply setting the - * sign bit. As it so happens the upper 16 bits are reserved (i.e - * set to 0) in the encodings for the VMCS so we are free to use the - * sign bit. - */ - if (ident < 0) - encoding = ident & 0x7fffffff; - else - encoding = vmcs_field_encoding(ident); - - if (encoding == (uint32_t)-1) - return (EINVAL); + int err; - if (!running) - VMPTRLD(vmcs); + __asm __volatile("vmclear %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); - error = vmread(encoding, retval); - - if (!running) - VMCLEAR(vmcs); - - return (error); -} - -int -vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) -{ - int error; - uint32_t encoding; - - if (ident < 0) - encoding = ident & 0x7fffffff; - else - encoding = vmcs_field_encoding(ident); - - if (encoding == (uint32_t)-1) - return (EINVAL); - - val = vmcs_fix_regval(encoding, val); - - if (!running) - VMPTRLD(vmcs); - - error = vmwrite(encoding, val); - - if (!running) - VMCLEAR(vmcs); - - return (error); -} - -int -vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) -{ - int error; - uint32_t base, limit, access; - - error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); - if (error != 0) - panic("vmcs_setdesc: invalid segment register %d", seg); - - if (!running) - VMPTRLD(vmcs); - if ((error = vmwrite(base, desc->base)) != 0) - goto done; - - if ((error = vmwrite(limit, desc->limit)) != 0) - goto done; - - if (access != VMCS_INVALID_ENCODING) { - if ((error = vmwrite(access, desc->access)) != 0) - goto done; + if (err != 0) { + panic("vmclear(%p) error %d", (void *)vmcs_pa, err); } -done: - if (!running) - VMCLEAR(vmcs); - return (error); -} - -int -vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) -{ - int error; - uint32_t base, limit, access; - uint64_t u64; - - error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); - if (error != 0) - panic("vmcs_getdesc: invalid segment register %d", seg); - if (!running) - VMPTRLD(vmcs); - if ((error = vmread(base, &u64)) != 0) - goto done; - desc->base = u64; - - if ((error = vmread(limit, &u64)) != 0) - goto done; - desc->limit = u64; - - if (access != VMCS_INVALID_ENCODING) { - if ((error = vmread(access, &u64)) != 0) - goto done; - desc->access = u64; - } -done: - if (!running) - VMCLEAR(vmcs); - return (error); + /* + * A call to critical_enter() was made in vmcs_load() to prevent + * preemption. Now that the VMCS is unloaded, it is safe to relax that + * restriction. + */ + critical_exit(); } -int -vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count) +void +vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa) { - int error; + int err; - VMPTRLD(vmcs); + /* set to VMCS revision */ + vmcs->identifier = VMX_BASIC_REVISION(rdmsr(MSR_VMX_BASIC)); /* - * Guest MSRs are saved in the VM-exit MSR-store area. - * Guest MSRs are loaded from the VM-entry MSR-load area. - * Both areas point to the same location in memory. + * Perform a vmclear on the VMCS, but without the critical section + * manipulation as done by vmcs_clear() above. */ - if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0) - goto done; - if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0) - goto done; - - if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0) - goto done; - if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0) - goto done; - - error = 0; -done: - VMCLEAR(vmcs); - return (error); + __asm __volatile("vmclear %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); + + if (err != 0) { + panic("vmclear(%p) error %d", (void *)vmcs_pa, err); + } } -int -vmcs_init(struct vmcs *vmcs) +void +vmcs_load(uintptr_t vmcs_pa) { - int error, codesel, datasel, tsssel; - u_long cr0, cr4, efer; - uint64_t pat; -#ifdef __FreeBSD__ - uint64_t fsbase, idtrbase; -#endif - - codesel = vmm_get_host_codesel(); - datasel = vmm_get_host_datasel(); - tsssel = vmm_get_host_tsssel(); + int err; /* - * Make sure we have a "current" VMCS to work with. + * While the VMCS is loaded on the CPU for subsequent operations, it is + * important that the thread not be preempted. That is ensured with + * critical_enter() here, with a matching critical_exit() call in + * vmcs_clear() once the VMCS is unloaded. */ - VMPTRLD(vmcs); - - /* Host state */ - - /* Initialize host IA32_PAT MSR */ - pat = vmm_get_host_pat(); - if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0) - goto done; + critical_enter(); - /* Load the IA32_EFER MSR */ - efer = vmm_get_host_efer(); - if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0) - goto done; + __asm __volatile("vmptrld %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); - /* Load the control registers */ - - cr0 = vmm_get_host_cr0(); - if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) - goto done; - - cr4 = vmm_get_host_cr4() | CR4_VMXE; - if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) - goto done; - - /* Load the segment selectors */ - if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0) - goto done; - -#ifdef __FreeBSD__ - if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0) - goto done; -#else - if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel())) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel())) != 0) - goto done; -#endif - - if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0) - goto done; - -#ifdef __FreeBSD__ - /* - * Load the Base-Address for %fs and idtr. - * - * Note that we exclude %gs, tss and gdtr here because their base - * address is pcpu specific. - */ - fsbase = vmm_get_host_fsbase(); - if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0) - goto done; - - idtrbase = vmm_get_host_idtrbase(); - if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0) - goto done; + if (err != 0) { + panic("vmptrld(%p) error %d", (void *)vmcs_pa, err); + } +} -#else /* __FreeBSD__ */ - /* - * Configure host sysenter MSRs to be restored on VM exit. - * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run. - */ - if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL)) != 0) - goto done; - /* Natively defined as MSR_INTC_SEP_EIP */ - if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_EIP, - rdmsr(MSR_SYSENTER_EIP_MSR))) != 0) - goto done; +uint64_t +vmcs_read(uint32_t encoding) +{ + int error; + uint64_t val; -#endif /* __FreeBSD__ */ + __asm __volatile("vmread %[enc], %[val];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error), [val] "=r" (val) + : [enc] "r" ((uint64_t)encoding) + : "memory"); - /* instruction pointer */ - if (no_flush_rsb) { - if ((error = vmwrite(VMCS_HOST_RIP, - (u_long)vmx_exit_guest)) != 0) - goto done; - } else { - if ((error = vmwrite(VMCS_HOST_RIP, - (u_long)vmx_exit_guest_flush_rsb)) != 0) - goto done; + if (error != 0) { + panic("vmread(%x) error %d", encoding, error); } - /* link pointer */ - if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) - goto done; -done: - VMCLEAR(vmcs); - return (error); + return (val); } -#ifdef DDB -extern int vmxon_enabled[]; - -DB_SHOW_COMMAND(vmcs, db_show_vmcs) +void +vmcs_write(uint32_t encoding, uint64_t val) { - uint64_t cur_vmcs, val; - uint32_t exit; - - if (!vmxon_enabled[curcpu]) { - db_printf("VMX not enabled\n"); - return; - } + int error; - if (have_addr) { - db_printf("Only current VMCS supported\n"); - return; - } + __asm __volatile("vmwrite %[val], %[enc];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error) + : [val] "r" (val), [enc] "r" ((uint64_t)encoding) + : "memory"); - vmptrst(&cur_vmcs); - if (cur_vmcs == VMCS_INITIAL) { - db_printf("No current VM context\n"); - return; - } - db_printf("VMCS: %jx\n", cur_vmcs); - db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID)); - db_printf("Activity: "); - val = vmcs_read(VMCS_GUEST_ACTIVITY); - switch (val) { - case 0: - db_printf("Active"); - break; - case 1: - db_printf("HLT"); - break; - case 2: - db_printf("Shutdown"); - break; - case 3: - db_printf("Wait for SIPI"); - break; - default: - db_printf("Unknown: %#lx", val); - } - db_printf("\n"); - exit = vmcs_read(VMCS_EXIT_REASON); - if (exit & 0x80000000) - db_printf("Entry Failure Reason: %u\n", exit & 0xffff); - else - db_printf("Exit Reason: %u\n", exit & 0xffff); - db_printf("Qualification: %#lx\n", vmcs_exit_qualification()); - db_printf("Guest Linear Address: %#lx\n", - vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); - switch (exit & 0x8000ffff) { - case EXIT_REASON_EXCEPTION: - case EXIT_REASON_EXT_INTR: - val = vmcs_read(VMCS_EXIT_INTR_INFO); - db_printf("Interrupt Type: "); - switch (val >> 8 & 0x7) { - case 0: - db_printf("external"); - break; - case 2: - db_printf("NMI"); - break; - case 3: - db_printf("HW exception"); - break; - case 4: - db_printf("SW exception"); - break; - default: - db_printf("?? %lu", val >> 8 & 0x7); - break; - } - db_printf(" Vector: %lu", val & 0xff); - if (val & 0x800) - db_printf(" Error Code: %lx", - vmcs_read(VMCS_EXIT_INTR_ERRCODE)); - db_printf("\n"); - break; - case EXIT_REASON_EPT_FAULT: - case EXIT_REASON_EPT_MISCONFIG: - db_printf("Guest Physical Address: %#lx\n", - vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)); - break; + if (error != 0) { + panic("vmwrite(%x, %lx) error %d", encoding, val, error); } - db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error()); } -#endif diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h index edde5c6dd5..1713872556 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h @@ -30,6 +30,7 @@ /* * Copyright 2017 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #ifndef _VMCS_H_ @@ -41,125 +42,20 @@ struct vmcs { uint32_t identifier; uint32_t abort_code; char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2]; -#ifndef __FreeBSD__ - /* - * Keep the physical address of the VMCS cached adjacent for the - * structure so it can be referenced in contexts which are too delicate - * for a call into the HAT. For the moment it means wasting a whole - * page on padding for the PA value to maintain alignment, but it - * allows the consumers of 'struct vmcs *' to easily access the value - * without a significant change to the interface. - */ - uint64_t vmcs_pa; - char _pa_pad[PAGE_SIZE - sizeof (vm_paddr_t)]; -#endif }; -#ifdef __FreeBSD__ -CTASSERT(sizeof(struct vmcs) == PAGE_SIZE); -#else -CTASSERT(sizeof(struct vmcs) == (2*PAGE_SIZE)); -#endif +CTASSERT(sizeof (struct vmcs) == PAGE_SIZE); -/* MSR save region is composed of an array of 'struct msr_entry' */ -struct msr_entry { - uint32_t index; - uint32_t reserved; - uint64_t val; +uint32_t vmcs_field_encoding(int ident); +void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, + uint32_t *acc); -}; +void vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa); -int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count); -int vmcs_init(struct vmcs *vmcs); -int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv); -int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val); -int vmcs_getdesc(struct vmcs *vmcs, int running, int ident, - struct seg_desc *desc); -int vmcs_setdesc(struct vmcs *vmcs, int running, int ident, - struct seg_desc *desc); +void vmcs_load(uintptr_t vmcs_pa); +void vmcs_clear(uintptr_t vmcs_pa); -/* - * Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h - */ -#ifdef _VMX_CPUFUNC_H_ -static __inline uint64_t -vmcs_read(uint32_t encoding) -{ - int error; - uint64_t val; - - error = vmread(encoding, &val); - KASSERT(error == 0, ("vmcs_read(%u) error %d", encoding, error)); - return (val); -} - -static __inline void -vmcs_write(uint32_t encoding, uint64_t val) -{ - int error; - - error = vmwrite(encoding, val); - KASSERT(error == 0, ("vmcs_write(%u) error %d", encoding, error)); -} - -#ifndef __FreeBSD__ -/* - * Due to header complexity combined with the need to cache the physical - * address for the VMCS, these must be defined here rather than vmx_cpufunc.h. - */ -static __inline int -vmclear(struct vmcs *vmcs) -{ - int error; - uint64_t addr = vmcs->vmcs_pa; - - __asm __volatile("vmclear %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} - -static __inline int -vmptrld(struct vmcs *vmcs) -{ - int error; - uint64_t addr = vmcs->vmcs_pa; - - __asm __volatile("vmptrld %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} - -static __inline void -VMCLEAR(struct vmcs *vmcs) -{ - int err; - - err = vmclear(vmcs); - if (err != 0) - panic("%s: vmclear(%p) error %d", __func__, vmcs, err); - - critical_exit(); -} - -static __inline void -VMPTRLD(struct vmcs *vmcs) -{ - int err; - - critical_enter(); - - err = vmptrld(vmcs); - if (err != 0) - panic("%s: vmptrld(%p) error %d", __func__, vmcs, err); -} -#endif /* __FreeBSD__ */ - -#endif /* _VMX_CPUFUNC_H_ */ +uint64_t vmcs_read(uint32_t encoding); +void vmcs_write(uint32_t encoding, uint64_t val); #define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH) #define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP) @@ -177,7 +73,6 @@ VMPTRLD(struct vmcs *vmcs) #define VMCS_INITIAL 0xffffffffffffffff -#define VMCS_IDENT(encoding) ((encoding) | 0x80000000) /* * VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B. */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index c46560948e..3e511b9f66 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -88,7 +88,6 @@ __FBSDID("$FreeBSD$"); #include "vlapic_priv.h" #include "ept.h" -#include "vmx_cpufunc.h" #include "vmcs.h" #include "vmx.h" #include "vmx_msr.h" @@ -172,11 +171,6 @@ SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); -#ifdef __FreeBSD__ -int vmxon_enabled[MAXCPU]; -static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); -#endif /*__FreeBSD__ */ - static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; static uint32_t exit_ctls, entry_ctls; @@ -196,10 +190,15 @@ static int vmx_initialized; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD, &vmx_initialized, 0, "Intel VMX initialized"); +static int no_flush_rsb; +SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, + &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); + /* * Optional capabilities */ #ifdef __FreeBSD__ +SYSCTL_DECL(_hw_vmm_vmx); static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); @@ -228,7 +227,9 @@ static int pirvec = -1; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD, &pirvec, 0, "APICv posted interrupt vector"); +#ifdef __FreeBSD__ static struct unrhdr *vpid_unr; +#endif /* __FreeBSD__*/ static u_int vpid_alloc_failed; SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD, &vpid_alloc_failed, 0, NULL); @@ -240,6 +241,13 @@ int guest_l1d_flush_sw; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD, &guest_l1d_flush_sw, 0, NULL); +/* MSR save region is composed of an array of 'struct msr_entry' */ +struct msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t val; +}; + static struct msr_entry msr_load_list[1] __aligned(16); /* @@ -330,11 +338,8 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return, static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); -static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); static void vmx_inject_pir(struct vlapic *vlapic); -#ifndef __FreeBSD__ -static int vmx_apply_tsc_adjust(struct vmx *, int); -#endif /* __FreeBSD__ */ +static void vmx_apply_tsc_adjust(struct vmx *, int); #ifdef KTR static const char * @@ -504,17 +509,15 @@ vmx_allow_x2apic_msrs(struct vmx *vmx) return (error); } -u_long +static u_long vmx_fix_cr0(u_long cr0) { - return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask); } -u_long +static u_long vmx_fix_cr4(u_long cr4) { - return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask); } @@ -845,45 +848,12 @@ vmx_trigger_hostintr(int vector) #endif /* __FreeBSD__ */ } -static int -vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial) -{ - int error, mask_ident, shadow_ident; - uint64_t mask_value; - - if (which != 0 && which != 4) - panic("vmx_setup_cr_shadow: unknown cr%d", which); - - if (which == 0) { - mask_ident = VMCS_CR0_MASK; - mask_value = cr0_ones_mask | cr0_zeros_mask; - shadow_ident = VMCS_CR0_SHADOW; - } else { - mask_ident = VMCS_CR4_MASK; - mask_value = cr4_ones_mask | cr4_zeros_mask; - shadow_ident = VMCS_CR4_SHADOW; - } - - error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value); - if (error) - return (error); - - error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial); - if (error) - return (error); - - return (0); -} -#define vmx_setup_cr0_shadow(vmcs,init) vmx_setup_cr_shadow(0, (vmcs), (init)) -#define vmx_setup_cr4_shadow(vmcs,init) vmx_setup_cr_shadow(4, (vmcs), (init)) - static void * vmx_vminit(struct vm *vm, pmap_t pmap) { uint16_t vpid[VM_MAXCPU]; - int i, error; + int i, error, datasel; struct vmx *vmx; - struct vmcs *vmcs; uint32_t exc_bitmap; uint16_t maxcpus; uint32_t proc_ctls, proc2_ctls, pin_ctls; @@ -972,6 +942,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) } maxcpus = vm_get_maxcpus(vm); + datasel = vmm_get_host_datasel(); for (i = 0; i < maxcpus; i++) { /* * Cache physical address lookups for various components which @@ -982,31 +953,58 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]); vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]); - vmcs = &vmx->vmcs[i]; - vmcs->identifier = vmx_revision(); - vmcs->vmcs_pa = (uint64_t)vtophys(vmcs); - error = vmclear(vmcs); - if (error != 0) { - panic("vmx_vminit: vmclear error %d on vcpu %d\n", - error, i); - } + vmx->vmcs_pa[i] = (uintptr_t)vtophys(&vmx->vmcs[i]); + vmcs_initialize(&vmx->vmcs[i], vmx->vmcs_pa[i]); vmx_msr_guest_init(vmx, i); - error = vmcs_init(vmcs); - KASSERT(error == 0, ("vmcs_init error %d", error)); + vmcs_load(vmx->vmcs_pa[i]); - VMPTRLD(vmcs); - error = 0; + vmcs_write(VMCS_HOST_IA32_PAT, vmm_get_host_pat()); + vmcs_write(VMCS_HOST_IA32_EFER, vmm_get_host_efer()); + + /* Load the control registers */ + vmcs_write(VMCS_HOST_CR0, vmm_get_host_cr0()); + vmcs_write(VMCS_HOST_CR4, vmm_get_host_cr4() | CR4_VMXE); + + /* Load the segment selectors */ + vmcs_write(VMCS_HOST_CS_SELECTOR, vmm_get_host_codesel()); + + vmcs_write(VMCS_HOST_ES_SELECTOR, datasel); + vmcs_write(VMCS_HOST_SS_SELECTOR, datasel); + vmcs_write(VMCS_HOST_DS_SELECTOR, datasel); - error += vmwrite(VMCS_EPTP, vmx->eptp); - error += vmwrite(VMCS_PIN_BASED_CTLS, pin_ctls); - error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); - error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); - error += vmwrite(VMCS_EXIT_CTLS, exit_ctls); - error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls); - error += vmwrite(VMCS_MSR_BITMAP, msr_bitmap_pa); - error += vmwrite(VMCS_VPID, vpid[i]); + vmcs_write(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel()); + vmcs_write(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel()); + vmcs_write(VMCS_HOST_TR_SELECTOR, vmm_get_host_tsssel()); + + /* + * Configure host sysenter MSRs to be restored on VM exit. + * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run. + */ + vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL); + vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP, + rdmsr(MSR_SYSENTER_EIP_MSR)); + + /* instruction pointer */ + if (no_flush_rsb) { + vmcs_write(VMCS_HOST_RIP, (uint64_t)vmx_exit_guest); + } else { + vmcs_write(VMCS_HOST_RIP, + (uint64_t)vmx_exit_guest_flush_rsb); + } + + /* link pointer */ + vmcs_write(VMCS_LINK_POINTER, ~0); + + vmcs_write(VMCS_EPTP, vmx->eptp); + vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls); + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); + vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); + vmcs_write(VMCS_EXIT_CTLS, exit_ctls); + vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); + vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa); + vmcs_write(VMCS_VPID, vpid[i]); if (guest_l1d_flush && !guest_l1d_flush_sw) { vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract( @@ -1022,28 +1020,39 @@ vmx_vminit(struct vm *vm, pmap_t pmap) exc_bitmap = 0xffffffff; else exc_bitmap = 1 << IDT_MC; - error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap); + vmcs_write(VMCS_EXCEPTION_BITMAP, exc_bitmap); vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1; - error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); + vmcs_write(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { - error += vmwrite(VMCS_VIRTUAL_APIC, apic_page_pa); + vmcs_write(VMCS_VIRTUAL_APIC, apic_page_pa); } if (vmx_cap_en(vmx, VMX_CAP_APICV)) { - error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); - error += vmwrite(VMCS_EOI_EXIT0, 0); - error += vmwrite(VMCS_EOI_EXIT1, 0); - error += vmwrite(VMCS_EOI_EXIT2, 0); - error += vmwrite(VMCS_EOI_EXIT3, 0); + vmcs_write(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); + vmcs_write(VMCS_EOI_EXIT0, 0); + vmcs_write(VMCS_EOI_EXIT1, 0); + vmcs_write(VMCS_EOI_EXIT2, 0); + vmcs_write(VMCS_EOI_EXIT3, 0); } if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { - error += vmwrite(VMCS_PIR_VECTOR, pirvec); - error += vmwrite(VMCS_PIR_DESC, pir_desc_pa); + vmcs_write(VMCS_PIR_VECTOR, pirvec); + vmcs_write(VMCS_PIR_DESC, pir_desc_pa); } - VMCLEAR(vmcs); - KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs")); + + /* + * Set up the CR0/4 masks and configure the read shadow state + * to the power-on register value from the Intel Sys Arch. + * CR0 - 0x60000010 + * CR4 - 0 + */ + vmcs_write(VMCS_CR0_MASK, cr0_ones_mask | cr0_zeros_mask); + vmcs_write(VMCS_CR0_SHADOW, 0x60000010); + vmcs_write(VMCS_CR4_MASK, cr4_ones_mask | cr4_zeros_mask); + vmcs_write(VMCS_CR4_SHADOW, 0); + + vmcs_clear(vmx->vmcs_pa[i]); vmx->cap[i].set = 0; vmx->cap[i].proc_ctls = proc_ctls; @@ -1054,19 +1063,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vmx->state[i].lastcpu = NOCPU; vmx->state[i].vpid = vpid[i]; - /* - * Set up the CR0/4 shadows, and init the read shadow - * to the power-on register value from the Intel Sys Arch. - * CR0 - 0x60000010 - * CR4 - 0 - */ - error = vmx_setup_cr0_shadow(vmcs, 0x60000010); - if (error != 0) - panic("vmx_setup_cr0_shadow %d", error); - - error = vmx_setup_cr4_shadow(vmcs, 0); - if (error != 0) - panic("vmx_setup_cr4_shadow %d", error); vmx->ctx[i].pmap = pmap; } @@ -1095,7 +1091,7 @@ static __inline void vmx_run_trace(struct vmx *vmx, int vcpu) { #ifdef KTR - VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip()); + VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %lx", vmcs_guest_rip()); #endif } @@ -1123,6 +1119,33 @@ vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip) static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved"); static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done"); +#define INVVPID_TYPE_ADDRESS 0UL +#define INVVPID_TYPE_SINGLE_CONTEXT 1UL +#define INVVPID_TYPE_ALL_CONTEXTS 2UL + +struct invvpid_desc { + uint16_t vpid; + uint16_t _res1; + uint32_t _res2; + uint64_t linear_addr; +}; +CTASSERT(sizeof(struct invvpid_desc) == 16); + +static __inline void +invvpid(uint64_t type, struct invvpid_desc desc) +{ + int error; + + __asm __volatile("invvpid %[desc], %[type];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error) + : [desc] "m" (desc), [type] "r" (type) + : "memory"); + + if (error) + panic("invvpid error %d", error); +} + /* * Invalidate guest mappings identified by its vpid from the TLB. */ @@ -1190,7 +1213,6 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) { struct vmxstate *vmxstate; -#ifndef __FreeBSD__ /* * Regardless of whether the VM appears to have migrated between CPUs, * save the host sysenter stack pointer. As it points to the kernel @@ -1203,8 +1225,7 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) * Perform any needed TSC_OFFSET adjustment based on TSC_MSR writes or * migration between host CPUs with differing TSC values. */ - VERIFY0(vmx_apply_tsc_adjust(vmx, vcpu)); -#endif + vmx_apply_tsc_adjust(vmx, vcpu); vmxstate = &vmx->state[vcpu]; if (vmxstate->lastcpu == curcpu) @@ -1214,10 +1235,8 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1); -#ifndef __FreeBSD__ /* Load the per-CPU IDT address */ vmcs_write(VMCS_HOST_IDTR_BASE, vmm_get_host_idtrbase()); -#endif vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase()); vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase()); vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase()); @@ -1245,7 +1264,7 @@ vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu) { KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0, - ("intr_window_exiting not set: %#x", vmx->cap[vcpu].proc_ctls)); + ("intr_window_exiting not set: %x", vmx->cap[vcpu].proc_ctls)); vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING; vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting"); @@ -1267,29 +1286,12 @@ vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) { KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0, - ("nmi_window_exiting not set %#x", vmx->cap[vcpu].proc_ctls)); + ("nmi_window_exiting not set %x", vmx->cap[vcpu].proc_ctls)); vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING; vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); } -#ifdef __FreeBSD__ -int -vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) -{ - int error; - - if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) { - vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET; - vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); - VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting"); - } - - error = vmwrite(VMCS_TSC_OFFSET, offset); - - return (error); -} -#else /* __FreeBSD__ */ /* * Set the TSC adjustment, taking into account the offsets measured between * host physical CPUs. This is required even if the guest has not set a TSC @@ -1297,24 +1299,20 @@ vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) * migrated onto. Without this mitigation, un-synched host TSCs will convey * the appearance of TSC time-travel to the guest as its vCPUs migrate. */ -static int +static void vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu) { extern hrtime_t tsc_gethrtime_tick_delta(void); const uint64_t target_offset = (vcpu_tsc_offset(vmx->vm, vcpu) + (uint64_t)tsc_gethrtime_tick_delta()); - int error = 0; ASSERT(vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET); if (vmx->tsc_offset_active[vcpu] != target_offset) { - error = vmwrite(VMCS_TSC_OFFSET, target_offset); + vmcs_write(VMCS_TSC_OFFSET, target_offset); vmx->tsc_offset_active[vcpu] = target_offset; } - - return (error); } -#endif /* __FreeBSD__ */ #define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) @@ -1333,11 +1331,11 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu) gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest " - "interruptibility-state %#x", gi)); + "interruptibility-state %x", gi)); info = vmcs_read(VMCS_ENTRY_INTR_INFO); KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid " - "VM-entry interruption information %#x", info)); + "VM-entry interruption information %x", info)); /* * Inject the virtual NMI. The vector must be the NMI IDT entry @@ -1373,7 +1371,7 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, if (vmx->state[vcpu].nextrip != guestrip && (gi & HWINTR_BLOCKING) != 0) { VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking " - "cleared due to rip change: %#lx/%#lx", + "cleared due to rip change: %lx/%lx", vmx->state[vcpu].nextrip, guestrip); gi &= ~HWINTR_BLOCKING; vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); @@ -1390,10 +1388,10 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry " - "intinfo is not valid: %#lx", __func__, entryinfo)); + "intinfo is not valid: %lx", __func__, entryinfo)); KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject " - "pending exception: %#lx/%#x", __func__, entryinfo, info)); + "pending exception: %lx/%x", __func__, entryinfo, info)); info = entryinfo; vector = info & 0xff; @@ -1432,11 +1430,11 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, need_nmi_exiting = 0; } else { VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI " - "due to VM-entry intr info %#x", info); + "due to VM-entry intr info %x", info); } } else { VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to " - "Guest Interruptibility-state %#x", gi); + "Guest Interruptibility-state %x", gi); } if (need_nmi_exiting) { @@ -1483,18 +1481,18 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, */ if ((gi & HWINTR_BLOCKING) != 0) { VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " - "Guest Interruptibility-state %#x", vector, gi); + "Guest Interruptibility-state %x", vector, gi); goto cantinject; } if ((info & VMCS_INTR_VALID) != 0) { VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " - "VM-entry intr info %#x", vector, info); + "VM-entry intr info %x", vector, info); goto cantinject; } rflags = vmcs_read(VMCS_GUEST_RFLAGS); if ((rflags & PSL_I) == 0) { VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " - "rflags %#lx", vector, rflags); + "rflags %lx", vector, rflags); goto cantinject; } @@ -1573,7 +1571,7 @@ vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING, - ("NMI blocking is not in effect %#x", gi)); + ("NMI blocking is not in effect %x", gi)); } static int @@ -1949,10 +1947,11 @@ vmexit_inout(struct vm_exit *vmexit, struct vie *vie, uint64_t qual, inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO); /* - * Bits 7-9 encode the address size of ins/outs operations where - * the 0/1/2 values correspond to 16/32/64 bit sizes. + * According to the SDM, bits 9:7 encode the address size of the + * ins/outs operation, but only values 0/1/2 are expected, + * corresponding to 16/32/64 bit sizes. */ - inout->addrsize = 2 << (1 + ((inst_info >> 7) & 0x3)); + inout->addrsize = 2 << BITX(inst_info, 9, 7); VERIFY(inout->addrsize == 2 || inout->addrsize == 4 || inout->addrsize == 8); @@ -2224,9 +2223,7 @@ emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) static int emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) { - struct vmxctx *vmxctx; uint64_t result; - uint32_t eax, edx; int error; if (lapic_msr(num)) @@ -2235,14 +2232,8 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) error = vmx_rdmsr(vmx, vcpuid, num, &result, retu); if (error == 0) { - eax = result; - vmxctx = &vmx->ctx[vcpuid]; - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax); - KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error)); - - edx = result >> 32; - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx); - KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error)); + vmx->ctx[vcpuid].guest_rax = (uint32_t)result; + vmx->ctx[vcpuid].guest_rdx = result >> 32; } return (error); @@ -2260,7 +2251,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) struct vie *vie; struct vlapic *vlapic; struct vm_task_switch *ts; - uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; + uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info; uint32_t intr_type, intr_vec, reason; uint64_t exitintinfo, qual, gpa; bool retu; @@ -2367,7 +2358,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) */ if (ts->reason == TSR_IDT_GATE) { KASSERT(idtvec_info & VMCS_IDT_VEC_VALID, - ("invalid idtvec_info %#x for IDT task switch", + ("invalid idtvec_info %x for IDT task switch", idtvec_info)); intr_type = idtvec_info & VMCS_INTR_T_MASK; if (intr_type != VMCS_INTR_T_SWINTR && @@ -2496,7 +2487,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) return (1); KASSERT((intr_info & VMCS_INTR_VALID) != 0 && (intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR, - ("VM exit interruption info invalid: %#x", intr_info)); + ("VM exit interruption info invalid: %x", intr_info)); vmx_trigger_hostintr(intr_info & 0xff); /* @@ -2528,7 +2519,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1); intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); KASSERT((intr_info & VMCS_INTR_VALID) != 0, - ("VM exit interruption info invalid: %#x", intr_info)); + ("VM exit interruption info invalid: %x", intr_info)); intr_vec = intr_info & 0xff; intr_type = intr_info & VMCS_INTR_T_MASK; @@ -2580,9 +2571,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } if (intr_vec == IDT_PF) { - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual); - KASSERT(error == 0, ("%s: vmxctx_setreg(cr2) error %d", - __func__, error)); + vmxctx->guest_cr2 = qual; } /* @@ -2600,7 +2589,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) errcode_valid = 1; errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE); } - VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into " + VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%x into " "the guest", intr_vec, errcode); SDT_PROBE5(vmm, vmx, exit, exception, vmx, vcpu, vmexit, intr_vec, errcode); @@ -2790,11 +2779,11 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); KASSERT((intr_info & VMCS_INTR_VALID) != 0, - ("VM exit interruption info invalid: %#x", intr_info)); + ("VM exit interruption info invalid: %x", intr_info)); if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) { KASSERT((intr_info & 0xff) == IDT_NMI, ("VM exit due " - "to NMI has invalid vector: %#x", intr_info)); + "to NMI has invalid vector: %x", intr_info)); VCPU_CTR0(vmx->vm, vcpuid, "Vectoring to NMI handler"); #ifdef __FreeBSD__ __asm __volatile("int $2"); @@ -2807,7 +2796,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) static __inline void vmx_dr_enter_guest(struct vmxctx *vmxctx) { - register_t rflags; + uint64_t rflags; /* Save host control debug registers. */ vmxctx->host_dr7 = rdr7(); @@ -2872,14 +2861,14 @@ vmx_dr_leave_guest(struct vmxctx *vmxctx) } static int -vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, +vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap, struct vm_eventinfo *evinfo) { int rc, handled, launched; struct vmx *vmx; struct vm *vm; struct vmxctx *vmxctx; - struct vmcs *vmcs; + uintptr_t vmcs_pa; struct vm_exit *vmexit; struct vlapic *vlapic; uint32_t exit_reason; @@ -2890,7 +2879,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx = arg; vm = vmx->vm; - vmcs = &vmx->vmcs[vcpu]; + vmcs_pa = vmx->vmcs_pa[vcpu]; vmxctx = &vmx->ctx[vcpu]; vlapic = vm_lapic(vm, vcpu); vmexit = vm_exitinfo(vm, vcpu); @@ -2901,7 +2890,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx_msr_guest_enter(vmx, vcpu); - VMPTRLD(vmcs); + vmcs_load(vmcs_pa); #ifndef __FreeBSD__ VERIFY(vmx->vmcs_state[vcpu] == VS_NONE && curthread->t_preempt != 0); @@ -2922,7 +2911,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx_set_pcpu_defaults(vmx, vcpu, pmap); do { KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch " - "%#lx/%#lx", __func__, vmcs_guest_rip(), rip)); + "%lx/%lx", __func__, vmcs_guest_rip(), rip)); handled = UNHANDLED; /* @@ -3115,7 +3104,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d", vmexit->exitcode); - VMCLEAR(vmcs); + vmcs_clear(vmcs_pa); vmx_msr_guest_exit(vmx, vcpu); #ifndef __FreeBSD__ @@ -3145,10 +3134,9 @@ vmx_vmcleanup(void *arg) return; } -static register_t * +static uint64_t * vmxctx_regptr(struct vmxctx *vmxctx, int reg) { - switch (reg) { case VM_REG_GUEST_RAX: return (&vmxctx->guest_rax); @@ -3199,157 +3187,129 @@ vmxctx_regptr(struct vmxctx *vmxctx, int reg) } static int -vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval) +vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) { - register_t *regp; - - if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { - *retval = *regp; - return (0); - } else - return (EINVAL); -} + int running, hostcpu, err; + struct vmx *vmx = arg; + uint64_t *regp; -static int -vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val) -{ - register_t *regp; + running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); - if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { - *regp = val; + /* VMCS access not required for ctx reads */ + if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { + *retval = *regp; return (0); - } else - return (EINVAL); -} - -static int -vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval) -{ - uint64_t gi; - int error; - - error = vmcs_getreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi); - *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; - return (error); -} - -static int -vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val) -{ - struct vmcs *vmcs; - uint64_t gi; - int error, ident; - - /* - * Forcing the vcpu into an interrupt shadow is not supported. - */ - if (val) { - error = EINVAL; - goto done; } - vmcs = &vmx->vmcs[vcpu]; - ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY); - error = vmcs_getreg(vmcs, running, ident, &gi); - if (error == 0) { - gi &= ~HWINTR_BLOCKING; - error = vmcs_setreg(vmcs, running, ident, gi); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); } -done: - VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val, - error ? "failed" : "succeeded"); - return (error); -} - -static int -vmx_shadow_reg(int reg) -{ - int shreg; - shreg = -1; + err = EINVAL; + if (reg == VM_REG_GUEST_INTR_SHADOW) { + uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); + *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; + err = 0; + } else { + uint32_t encoding; - switch (reg) { - case VM_REG_GUEST_CR0: - shreg = VMCS_CR0_SHADOW; - break; - case VM_REG_GUEST_CR4: - shreg = VMCS_CR4_SHADOW; - break; - default: - break; + encoding = vmcs_field_encoding(reg); + if (encoding != VMCS_INVALID_ENCODING) { + *retval = vmcs_read(encoding); + err = 0; + } } - return (shreg); -} - -static int -vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) -{ - int running, hostcpu; - struct vmx *vmx = arg; - - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); - - if (reg == VM_REG_GUEST_INTR_SHADOW) - return (vmx_get_intr_shadow(vmx, vcpu, running, retval)); - - if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0) - return (0); + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } - return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval)); + return (err); } static int vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) { - int error, hostcpu, running, shadow; - uint64_t ctls; - pmap_t pmap; + int running, hostcpu, error; struct vmx *vmx = arg; + uint64_t *regp; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu); - if (reg == VM_REG_GUEST_INTR_SHADOW) - return (vmx_modify_intr_shadow(vmx, vcpu, running, val)); - - if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0) + /* VMCS access not required for ctx writes */ + if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { + *regp = val; return (0); + } - error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val); - - if (error == 0) { - /* - * If the "load EFER" VM-entry control is 1 then the - * value of EFER.LMA must be identical to "IA-32e mode guest" - * bit in the VM-entry control. - */ - if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 && - (reg == VM_REG_GUEST_EFER)) { - vmcs_getreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls); - if (val & EFER_LMA) - ctls |= VM_ENTRY_GUEST_LMA; - else - ctls &= ~VM_ENTRY_GUEST_LMA; - vmcs_setreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_ENTRY_CTLS), ctls); - } + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } - shadow = vmx_shadow_reg(reg); - if (shadow > 0) { + if (reg == VM_REG_GUEST_INTR_SHADOW) { + if (val != 0) { /* - * Store the unmodified value in the shadow + * Forcing the vcpu into an interrupt shadow is not + * presently supported. */ - error = vmcs_setreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(shadow), val); + error = EINVAL; + } else { + uint64_t gi; + + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); + gi &= ~HWINTR_BLOCKING; + vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); + error = 0; } + } else { + uint32_t encoding; - if (reg == VM_REG_GUEST_CR3) { + error = 0; + encoding = vmcs_field_encoding(reg); + switch (encoding) { + case VMCS_GUEST_IA32_EFER: + /* + * If the "load EFER" VM-entry control is 1 then the + * value of EFER.LMA must be identical to "IA-32e mode + * guest" bit in the VM-entry control. + */ + if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) { + uint64_t ctls; + + ctls = vmcs_read(VMCS_ENTRY_CTLS); + if (val & EFER_LMA) { + ctls |= VM_ENTRY_GUEST_LMA; + } else { + ctls &= ~VM_ENTRY_GUEST_LMA; + } + vmcs_write(VMCS_ENTRY_CTLS, ctls); + } + vmcs_write(encoding, val); + break; + case VMCS_GUEST_CR0: + /* + * The guest is not allowed to modify certain bits in + * %cr0 and %cr4. To maintain the illusion of full + * control, they have shadow versions which contain the + * guest-perceived (via reads from the register) values + * as opposed to the guest-effective values. + * + * This is detailed in the SDM: Vol. 3 Ch. 24.6.6. + */ + vmcs_write(VMCS_CR0_SHADOW, val); + vmcs_write(encoding, vmx_fix_cr0(val)); + break; + case VMCS_GUEST_CR4: + /* See above for detail on %cr4 shadowing */ + vmcs_write(VMCS_CR4_SHADOW, val); + vmcs_write(encoding, vmx_fix_cr4(val)); + break; + case VMCS_GUEST_CR3: + vmcs_write(encoding, val); /* * Invalidate the guest vcpu's TLB mappings to emulate * the behavior of updating %cr3. @@ -3357,38 +3317,80 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * XXX the processor retains global mappings when %cr3 * is updated but vmx_invvpid() does not. */ - pmap = vmx->ctx[vcpu].pmap; - vmx_invvpid(vmx, vcpu, pmap, running); + vmx_invvpid(vmx, vcpu, vmx->ctx[vcpu].pmap, running); + break; + case VMCS_INVALID_ENCODING: + error = EINVAL; + break; + default: + vmcs_write(encoding, val); + break; } } + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (error); } static int -vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) { int hostcpu, running; struct vmx *vmx = arg; + uint32_t base, limit, access; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu); - return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc)); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } + + vmcs_seg_desc_encoding(seg, &base, &limit, &access); + desc->base = vmcs_read(base); + desc->limit = vmcs_read(limit); + if (access != VMCS_INVALID_ENCODING) { + desc->access = vmcs_read(access); + } else { + desc->access = 0; + } + + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (0); } static int -vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmx_setdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) { int hostcpu, running; struct vmx *vmx = arg; + uint32_t base, limit, access; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu); - return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc)); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } + + vmcs_seg_desc_encoding(seg, &base, &limit, &access); + vmcs_write(base, desc->base); + vmcs_write(limit, desc->limit); + if (access != VMCS_INVALID_ENCODING) { + vmcs_write(access, desc->access); + } + + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (0); } static int @@ -3436,21 +3438,17 @@ static int vmx_setcap(void *arg, int vcpu, int type, int val) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; - uint32_t baseval; + uint32_t baseval, reg, flag; uint32_t *pptr; int error; - int flag; - int reg; - int retval; - retval = ENOENT; + error = ENOENT; pptr = NULL; switch (type) { case VM_CAP_HALT_EXIT: if (cap_halt_exit) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_HLT_EXITING; @@ -3459,7 +3457,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_MTRAP_EXIT: if (cap_monitor_trap) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_MTF; @@ -3468,7 +3466,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_PAUSE_EXIT: if (cap_pause_exit) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_PAUSE_EXITING; @@ -3477,7 +3475,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_ENABLE_INVPCID: if (cap_invpcid) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls2; baseval = *pptr; flag = PROCBASED2_ENABLE_INVPCID; @@ -3485,7 +3483,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } break; case VM_CAP_BPT_EXIT: - retval = 0; + error = 0; /* Don't change the bitmap if we are tracing all exceptions. */ if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) { @@ -3499,8 +3497,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; } - if (retval) - return (retval); + if (error != 0) { + return (error); + } if (pptr != NULL) { if (val) { @@ -3508,12 +3507,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } else { baseval &= ~flag; } - VMPTRLD(vmcs); - error = vmwrite(reg, baseval); - VMCLEAR(vmcs); - - if (error) - return (error); + vmcs_load(vmx->vmcs_pa[vcpu]); + vmcs_write(reg, baseval); + vmcs_clear(vmx->vmcs_pa[vcpu]); /* * Update optional stored flags, and record @@ -3715,13 +3711,11 @@ static void vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) { struct vmx *vmx; - struct vmcs *vmcs; uint32_t proc_ctls; int vcpuid; vcpuid = vlapic->vcpuid; vmx = ((struct vlapic_vtx *)vlapic)->vmx; - vmcs = &vmx->vmcs[vcpuid]; proc_ctls = vmx->cap[vcpuid].proc_ctls; proc_ctls &= ~PROCBASED_USE_TPR_SHADOW; @@ -3729,34 +3723,32 @@ vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) proc_ctls |= PROCBASED_CR8_STORE_EXITING; vmx->cap[vcpuid].proc_ctls = proc_ctls; - VMPTRLD(vmcs); + vmcs_load(vmx->vmcs_pa[vcpuid]); vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); - VMCLEAR(vmcs); + vmcs_clear(vmx->vmcs_pa[vcpuid]); } static void vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) { struct vmx *vmx; - struct vmcs *vmcs; uint32_t proc_ctls2; int vcpuid, error; vcpuid = vlapic->vcpuid; vmx = ((struct vlapic_vtx *)vlapic)->vmx; - vmcs = &vmx->vmcs[vcpuid]; proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0, - ("%s: invalid proc_ctls2 %#x", __func__, proc_ctls2)); + ("%s: invalid proc_ctls2 %x", __func__, proc_ctls2)); proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES; proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE; vmx->cap[vcpuid].proc_ctls2 = proc_ctls2; - VMPTRLD(vmcs); + vmcs_load(vmx->vmcs_pa[vcpuid]); vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2); - VMCLEAR(vmcs); + vmcs_clear(vmx->vmcs_pa[vcpuid]); if (vlapic->vcpuid == 0) { /* @@ -3932,10 +3924,9 @@ static void vmx_savectx(void *arg, int vcpu) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { - VERIFY3U(vmclear(vmcs), ==, 0); + vmcs_clear(vmx->vmcs_pa[vcpu]); vmx_msr_guest_exit(vmx, vcpu); /* * Having VMCLEARed the VMCS, it can no longer be re-entered @@ -3951,13 +3942,12 @@ static void vmx_restorectx(void *arg, int vcpu) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; ASSERT0(vmx->vmcs_state[vcpu] & VS_LAUNCHED); if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { vmx_msr_guest_enter(vmx, vcpu); - VERIFY3U(vmptrld(vmcs), ==, 0); + vmcs_load(vmx->vmcs_pa[vcpu]); } } #endif /* __FreeBSD__ */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h index 0fd723f9c9..7943c1fd0e 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h @@ -50,44 +50,34 @@ struct pmap; struct vmxctx { - register_t guest_rdi; /* Guest state */ - register_t guest_rsi; - register_t guest_rdx; - register_t guest_rcx; - register_t guest_r8; - register_t guest_r9; - register_t guest_rax; - register_t guest_rbx; - register_t guest_rbp; - register_t guest_r10; - register_t guest_r11; - register_t guest_r12; - register_t guest_r13; - register_t guest_r14; - register_t guest_r15; - register_t guest_cr2; - register_t guest_dr0; - register_t guest_dr1; - register_t guest_dr2; - register_t guest_dr3; - register_t guest_dr6; - -#ifdef __FreeBSD__ - register_t host_r15; /* Host state */ - register_t host_r14; - register_t host_r13; - register_t host_r12; - register_t host_rbp; - register_t host_rsp; - register_t host_rbx; -#endif /* __FreeBSD__ */ - - register_t host_dr0; - register_t host_dr1; - register_t host_dr2; - register_t host_dr3; - register_t host_dr6; - register_t host_dr7; + uint64_t guest_rdi; /* Guest state */ + uint64_t guest_rsi; + uint64_t guest_rdx; + uint64_t guest_rcx; + uint64_t guest_r8; + uint64_t guest_r9; + uint64_t guest_rax; + uint64_t guest_rbx; + uint64_t guest_rbp; + uint64_t guest_r10; + uint64_t guest_r11; + uint64_t guest_r12; + uint64_t guest_r13; + uint64_t guest_r14; + uint64_t guest_r15; + uint64_t guest_cr2; + uint64_t guest_dr0; + uint64_t guest_dr1; + uint64_t guest_dr2; + uint64_t guest_dr3; + uint64_t guest_dr6; + + uint64_t host_dr0; + uint64_t host_dr1; + uint64_t host_dr2; + uint64_t host_dr3; + uint64_t host_dr6; + uint64_t host_dr7; uint64_t host_debugctl; int host_tf; @@ -156,6 +146,7 @@ struct vmx { uint64_t host_msrs[VM_MAXCPU][GUEST_MSR_NUM]; uint64_t tsc_offset_active[VM_MAXCPU]; vmcs_state_t vmcs_state[VM_MAXCPU]; + uintptr_t vmcs_pa[VM_MAXCPU]; #endif struct vmxctx ctx[VM_MAXCPU]; struct vmxcap cap[VM_MAXCPU]; @@ -175,17 +166,38 @@ vmx_cap_en(const struct vmx *vmx, enum vmx_caps cap) return ((vmx->vmx_caps & cap) == cap); } + +/* + * Section 5.2 "Conventions" from Intel Architecture Manual 2B. + * + * error + * VMsucceed 0 + * VMFailInvalid 1 + * VMFailValid 2 see also VMCS VM-Instruction Error Field + */ +#define VM_SUCCESS 0 +#define VM_FAIL_INVALID 1 +#define VM_FAIL_VALID 2 +#define VMX_SET_ERROR_CODE_ASM \ + " jnc 1f;" \ + " mov $1, %[error];" /* CF: error = 1 */ \ + " jmp 3f;" \ + "1: jnz 2f;" \ + " mov $2, %[error];" /* ZF: error = 2 */ \ + " jmp 3f;" \ + "2: mov $0, %[error];" \ + "3:" + + #define VMX_GUEST_VMEXIT 0 #define VMX_VMRESUME_ERROR 1 #define VMX_VMLAUNCH_ERROR 2 #define VMX_INVEPT_ERROR 3 #define VMX_VMWRITE_ERROR 4 + int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched); void vmx_call_isr(uintptr_t entry); -u_long vmx_fix_cr0(u_long cr0); -u_long vmx_fix_cr4(u_long cr4); - int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset); extern char vmx_exit_guest[]; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h deleted file mode 100644 index f0c5ba7691..0000000000 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h +++ /dev/null @@ -1,244 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - * - * Copyright 2014 Pluribus Networks Inc. - * Copyright 2017 Joyent, Inc. - */ - -#ifndef _VMX_CPUFUNC_H_ -#define _VMX_CPUFUNC_H_ - -struct vmcs; - -/* - * Section 5.2 "Conventions" from Intel Architecture Manual 2B. - * - * error - * VMsucceed 0 - * VMFailInvalid 1 - * VMFailValid 2 see also VMCS VM-Instruction Error Field - */ -#define VM_SUCCESS 0 -#define VM_FAIL_INVALID 1 -#define VM_FAIL_VALID 2 -#define VMX_SET_ERROR_CODE \ - " jnc 1f;" \ - " mov $1, %[error];" /* CF: error = 1 */ \ - " jmp 3f;" \ - "1: jnz 2f;" \ - " mov $2, %[error];" /* ZF: error = 2 */ \ - " jmp 3f;" \ - "2: mov $0, %[error];" \ - "3:" - -/* returns 0 on success and non-zero on failure */ -static __inline int -vmxon(char *region) -{ - int error; - uint64_t addr; - -#ifdef __FreeBSD__ - addr = vtophys(region); -#else - /* This is pre-translated in illumos */ - addr = (uint64_t)region; -#endif - __asm __volatile("vmxon %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - - return (error); -} - -#ifdef __FreeBSD__ -/* returns 0 on success and non-zero on failure */ -static __inline int -vmclear(struct vmcs *vmcs) -{ - int error; - uint64_t addr; - - addr = vtophys(vmcs); - __asm __volatile("vmclear %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} -#endif /* __FreeBSD__ */ - -static __inline void -vmxoff(void) -{ - - __asm __volatile("vmxoff"); -} - -static __inline void -vmptrst(uint64_t *addr) -{ - - __asm __volatile("vmptrst %[addr]" :: [addr]"m" (*addr) : "memory"); -} - -#ifdef __FreeBSD__ -static __inline int -vmptrld(struct vmcs *vmcs) -{ - int error; - uint64_t addr; - - addr = vtophys(vmcs); - __asm __volatile("vmptrld %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} -#endif /* __FreeBSD__ */ - -static __inline int -vmwrite(uint64_t reg, uint64_t val) -{ - int error; - - __asm __volatile("vmwrite %[val], %[reg];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [val] "r" (val), [reg] "r" (reg) - : "memory"); - - return (error); -} - -static __inline int -vmread(uint64_t r, uint64_t *addr) -{ - int error; - - __asm __volatile("vmread %[r], %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [r] "r" (r), [addr] "m" (*addr) - : "memory"); - - return (error); -} - -#ifdef __FreeBSD__ -static __inline void -VMCLEAR(struct vmcs *vmcs) -{ - int err; - - err = vmclear(vmcs); - if (err != 0) - panic("%s: vmclear(%p) error %d", __func__, vmcs, err); - - critical_exit(); -} - -static __inline void -VMPTRLD(struct vmcs *vmcs) -{ - int err; - - critical_enter(); - - err = vmptrld(vmcs); - if (err != 0) - panic("%s: vmptrld(%p) error %d", __func__, vmcs, err); -} -#endif /* __FreeBSD__ */ - -#define INVVPID_TYPE_ADDRESS 0UL -#define INVVPID_TYPE_SINGLE_CONTEXT 1UL -#define INVVPID_TYPE_ALL_CONTEXTS 2UL - -struct invvpid_desc { - uint16_t vpid; - uint16_t _res1; - uint32_t _res2; - uint64_t linear_addr; -}; -CTASSERT(sizeof(struct invvpid_desc) == 16); - -static __inline void -invvpid(uint64_t type, struct invvpid_desc desc) -{ - int error; - - __asm __volatile("invvpid %[desc], %[type];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [desc] "m" (desc), [type] "r" (type) - : "memory"); - - if (error) - panic("invvpid error %d", error); -} - -#define INVEPT_TYPE_SINGLE_CONTEXT 1UL -#define INVEPT_TYPE_ALL_CONTEXTS 2UL -struct invept_desc { - uint64_t eptp; - uint64_t _res; -}; -CTASSERT(sizeof(struct invept_desc) == 16); - -static __inline void -invept(uint64_t type, struct invept_desc desc) -{ - int error; - - __asm __volatile("invept %[desc], %[type];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [desc] "m" (desc), [type] "r" (type) - : "memory"); - - if (error) - panic("invept error %d", error); -} -#endif diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c index 6c37c9c234..cfdf2bfe05 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c @@ -62,13 +62,6 @@ vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) return ((msr_val & (1UL << bitpos)) == 0); } -uint32_t -vmx_revision(void) -{ - - return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); -} - /* * Generate a bitmask to be used for the VMCS execution control fields. * diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h index ac2adb0dd1..848cdea26b 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h @@ -40,8 +40,6 @@ void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid); int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu); int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu); -uint32_t vmx_revision(void); - int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, uint32_t zeros_mask, uint32_t *retval); diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c index 50c0934ace..79524220b5 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c @@ -611,10 +611,10 @@ vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, ptpindex = 0; ptpshift = 0; - KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__, + KASSERT(gpa + len > gpa, ("%s: invalid gpa range %lx/%lx", __func__, gpa, len)); - KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond " - "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr)); + KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %lx/%lx beyond " + "domain maxaddr %lx", __func__, gpa, len, dom->maxaddr)); if (gpa & PAGE_MASK) panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); diff --git a/usr/src/uts/i86pc/io/vmm/io/iommu.c b/usr/src/uts/i86pc/io/vmm/io/iommu.c index 918a9ec3e4..2e5fc9df32 100644 --- a/usr/src/uts/i86pc/io/vmm/io/iommu.c +++ b/usr/src/uts/i86pc/io/vmm/io/iommu.c @@ -204,12 +204,8 @@ iommu_find_device(dev_info_t *dip, void *arg) static void iommu_init(void) { - int error, bus, slot, func; + int error; vm_paddr_t maxaddr; -#ifdef __FreeBSD__ - devclass_t dc; -#endif - device_t dev; if (!iommu_enable) return; @@ -246,35 +242,7 @@ iommu_init(void) */ iommu_create_mapping(host_domain, 0, 0, maxaddr); -#ifdef __FreeBSD__ - add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0); - delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete, - NULL, 0); - dc = devclass_find("ppt"); - for (bus = 0; bus <= PCI_BUSMAX; bus++) { - for (slot = 0; slot <= PCI_SLOTMAX; slot++) { - for (func = 0; func <= PCI_FUNCMAX; func++) { - dev = pci_find_dbsf(0, bus, slot, func); - if (dev == NULL) - continue; - - /* Skip passthrough devices. */ - if (dc != NULL && - device_get_devclass(dev) == dc) - continue; - - /* - * Everything else belongs to the host - * domain. - */ - iommu_add_device(host_domain, - pci_get_rid(dev)); - } - } - } -#else ddi_walk_devs(ddi_root_node(), iommu_find_device, (void *)B_TRUE); -#endif IOMMU_ENABLE(); } diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c index c1825f4264..f7a05254ec 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c @@ -96,6 +96,12 @@ __FBSDID("$FreeBSD$"); static void vlapic_set_error(struct vlapic *, uint32_t, bool); static void vlapic_tmr_reset(struct vlapic *); +#ifdef __ISRVEC_DEBUG +static void vlapic_isrstk_accept(struct vlapic *, int); +static void vlapic_isrstk_eoi(struct vlapic *, int); +static void vlapic_isrstk_verify(const struct vlapic *); +#endif /* __ISRVEC_DEBUG */ + static __inline uint32_t vlapic_get_id(struct vlapic *vlapic) { @@ -134,12 +140,14 @@ vlapic_dfr_write_handler(struct vlapic *vlapic) lapic->dfr &= APIC_DFR_MODEL_MASK; lapic->dfr |= APIC_DFR_RESERVED; +#ifdef __FreeBSD__ if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); else VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); +#endif } void @@ -495,21 +503,33 @@ vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) return (1); } -#if 1 -static void -dump_isrvec_stk(struct vlapic *vlapic) +static uint_t +vlapic_active_isr(struct vlapic *vlapic) { int i; - uint32_t *isrptr; + uint32_t *isrp; - isrptr = &vlapic->apic_page->isr0; - for (i = 0; i < 8; i++) - printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); + isrp = &vlapic->apic_page->isr7; - for (i = 0; i <= vlapic->isrvec_stk_top; i++) - printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); + for (i = 7; i >= 0; i--, isrp -= 4) { + uint32_t reg = *isrp; + + if (reg != 0) { + uint_t vec = (i * 32) + bsrl(reg); + + if (vec < 16) { + /* + * Truncate the illegal low vectors to value of + * 0, indicating that no active ISR was found. + */ + return (0); + } + return (vec); + } + } + + return (0); } -#endif /* * Algorithm adopted from section "Interrupt, Task and Processor Priority" @@ -520,55 +540,11 @@ vlapic_update_ppr(struct vlapic *vlapic) { int isrvec, tpr, ppr; - /* - * Note that the value on the stack at index 0 is always 0. - * - * This is a placeholder for the value of ISRV when none of the - * bits is set in the ISRx registers. - */ - isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; + isrvec = vlapic_active_isr(vlapic); tpr = vlapic->apic_page->tpr; -#if 1 - { - int i, lastprio, curprio, vector, idx; - uint32_t *isrptr; - - if (vlapic->isrvec_stk_top == 0 && isrvec != 0) - panic("isrvec_stk is corrupted: %d", isrvec); - - /* - * Make sure that the priority of the nested interrupts is - * always increasing. - */ - lastprio = -1; - for (i = 1; i <= vlapic->isrvec_stk_top; i++) { - curprio = PRIO(vlapic->isrvec_stk[i]); - if (curprio <= lastprio) { - dump_isrvec_stk(vlapic); - panic("isrvec_stk does not satisfy invariant"); - } - lastprio = curprio; - } - - /* - * Make sure that each bit set in the ISRx registers has a - * corresponding entry on the isrvec stack. - */ - i = 1; - isrptr = &vlapic->apic_page->isr0; - for (vector = 0; vector < 256; vector++) { - idx = (vector / 32) * 4; - if (isrptr[idx] & (1 << (vector % 32))) { - if (i > vlapic->isrvec_stk_top || - vlapic->isrvec_stk[i] != vector) { - dump_isrvec_stk(vlapic); - panic("ISR and isrvec_stk out of sync"); - } - i++; - } - } - } +#ifdef __ISRVEC_DEBUG + vlapic_isrstk_verify(vlapic); #endif if (PRIO(tpr) >= PRIO(isrvec)) @@ -593,25 +569,25 @@ vlapic_process_eoi(struct vlapic *vlapic) { struct LAPIC *lapic = vlapic->apic_page; uint32_t *isrptr, *tmrptr; - int i, idx, bitpos, vector; + int i; + uint_t idx, bitpos, vector; isrptr = &lapic->isr0; tmrptr = &lapic->tmr0; for (i = 7; i >= 0; i--) { idx = i * 4; - bitpos = fls(isrptr[idx]); - if (bitpos-- != 0) { - if (vlapic->isrvec_stk_top <= 0) { - panic("invalid vlapic isrvec_stk_top %d", - vlapic->isrvec_stk_top); - } - isrptr[idx] &= ~(1 << bitpos); + if (isrptr[idx] != 0) { + bitpos = bsrl(isrptr[idx]); vector = i * 32 + bitpos; + + isrptr[idx] &= ~(1 << bitpos); VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", vector); VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); - vlapic->isrvec_stk_top--; +#ifdef __ISRVEC_DEBUG + vlapic_isrstk_eoi(vlapic, vector); +#endif vlapic_update_ppr(vlapic); if ((tmrptr[idx] & (1 << bitpos)) != 0) { vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, @@ -1143,7 +1119,7 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) { struct LAPIC *lapic = vlapic->apic_page; uint32_t *irrptr, *isrptr; - int idx, stk_top; + int idx; if (vlapic->ops.intr_accepted) return ((*vlapic->ops.intr_accepted)(vlapic, vector)); @@ -1162,16 +1138,9 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) isrptr[idx] |= 1 << (vector % 32); VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); - /* - * Update the PPR - */ - vlapic->isrvec_stk_top++; - - stk_top = vlapic->isrvec_stk_top; - if (stk_top >= ISRVEC_STK_SIZE) - panic("isrvec_stk_top overflow %d", stk_top); - - vlapic->isrvec_stk[stk_top] = vector; +#ifdef __ISRVEC_DEBUG + vlapic_isrstk_accept(vlapic, vector); +#endif } void @@ -1708,3 +1677,92 @@ vlapic_localize_resources(struct vlapic *vlapic) vmm_glue_callout_localize(&vlapic->callout); } #endif /* __FreeBSD */ + +#ifdef __ISRVEC_DEBUG +static void +vlapic_isrstk_eoi(struct vlapic *vlapic, int vector) +{ + if (vlapic->isrvec_stk_top <= 0) { + panic("invalid vlapic isrvec_stk_top %d", + vlapic->isrvec_stk_top); + } + vlapic->isrvec_stk_top--; +} + +static void +vlapic_isrstk_accept(struct vlapic *vlapic, int vector) +{ + int stk_top; + + vlapic->isrvec_stk_top++; + + stk_top = vlapic->isrvec_stk_top; + if (stk_top >= ISRVEC_STK_SIZE) + panic("isrvec_stk_top overflow %d", stk_top); + + vlapic->isrvec_stk[stk_top] = vector; +} + +static void +vlapic_isrstk_dump(const struct vlapic *vlapic) +{ + int i; + uint32_t *isrptr; + + isrptr = &vlapic->apic_page->isr0; + for (i = 0; i < 8; i++) + printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); + + for (i = 0; i <= vlapic->isrvec_stk_top; i++) + printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); +} + +static void +vlapic_isrstk_verify(const struct vlapic *vlapic) +{ + int i, lastprio, curprio, vector, idx; + uint32_t *isrptr; + + /* + * Note: The value at index 0 in isrvec_stk is always 0. + * + * It is a placeholder for the value of ISR vector when no bits are set + * in the ISRx registers. + */ + if (vlapic->isrvec_stk_top == 0 && vlapic->isrvec_stk[0] != 0) { + panic("isrvec_stk is corrupted: %d", vlapic->isrvec_stk[0]); + } + + /* + * Make sure that the priority of the nested interrupts is + * always increasing. + */ + lastprio = -1; + for (i = 1; i <= vlapic->isrvec_stk_top; i++) { + curprio = PRIO(vlapic->isrvec_stk[i]); + if (curprio <= lastprio) { + vlapic_isrstk_dump(vlapic); + panic("isrvec_stk does not satisfy invariant"); + } + lastprio = curprio; + } + + /* + * Make sure that each bit set in the ISRx registers has a + * corresponding entry on the isrvec stack. + */ + i = 1; + isrptr = &vlapic->apic_page->isr0; + for (vector = 0; vector < 256; vector++) { + idx = (vector / 32) * 4; + if (isrptr[idx] & (1 << (vector % 32))) { + if (i > vlapic->isrvec_stk_top || + vlapic->isrvec_stk[i] != vector) { + vlapic_isrstk_dump(vlapic); + panic("ISR and isrvec_stk out of sync"); + } + i++; + } + } +} +#endif diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h index 5795d48d52..8a0d594de3 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h @@ -27,6 +27,18 @@ * * $FreeBSD$ */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ #ifndef _VLAPIC_PRIV_H_ #define _VLAPIC_PRIV_H_ @@ -140,6 +152,10 @@ enum boot_state { #define VLAPIC_TMR_CNT 8 +#ifdef DEBUG +#define __ISRVEC_DEBUG +#endif + struct vlapic; struct vlapic_ops { @@ -166,15 +182,6 @@ struct vlapic { struct bintime timer_period_bt; /* timer period */ struct mtx timer_mtx; - /* - * The 'isrvec_stk' is a stack of vectors injected by the local apic. - * A vector is popped from the stack when the processor does an EOI. - * The vector on the top of the stack is used to compute the - * Processor Priority in conjunction with the TPR. - */ - uint8_t isrvec_stk[ISRVEC_STK_SIZE]; - int isrvec_stk_top; - uint64_t msr_apicbase; enum boot_state boot_state; @@ -199,6 +206,19 @@ struct vlapic { */ uint32_t tmr_vec_deassert[VLAPIC_TMR_CNT]; uint32_t tmr_vec_assert[VLAPIC_TMR_CNT]; + +#ifdef __ISRVEC_DEBUG + /* + * The 'isrvec_stk' is a stack of vectors injected by the local APIC. + * It is used as a debugging method to double-check the behavior of the + * emulation. Vectors are pushed to the stack when they are accepted + * for injection and popped from the stack when the processor performs + * an EOI. The vector on the top of the stack is used to verify the + * computed Processor Priority. + */ + uint8_t isrvec_stk[ISRVEC_STK_SIZE]; + int isrvec_stk_top; +#endif }; void vlapic_init(struct vlapic *vlapic); diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c index 0dce2b0a1f..6664cb06e7 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c +++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c @@ -98,7 +98,7 @@ vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes, now = sbinuptime(); delta = now - vpmtmr->baseuptime; KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: " - "%#lx to %#lx", vpmtmr->baseuptime, now)); + "%lx to %lx", vpmtmr->baseuptime, now)); *val = vpmtmr->baseval + delta / vpmtmr->freq_sbt; return (0); diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c index 343ad9c37a..e560ce9b7f 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c +++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c @@ -161,7 +161,7 @@ vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime) now = sbinuptime(); delta = now - vrtc->base_uptime; KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: " - "%#lx to %#lx", vrtc->base_uptime, now)); + "%lx to %lx", vrtc->base_uptime, now)); secs = delta / SBT_1S; t += secs; *basetime += secs * SBT_1S; @@ -191,7 +191,7 @@ secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update) if (rtctime < 0) { KASSERT(rtctime == VRTC_BROKEN_TIME, - ("%s: invalid vrtc time %#lx", __func__, rtctime)); + ("%s: invalid vrtc time %lx", __func__, rtctime)); return; } @@ -286,33 +286,23 @@ rtc_to_secs(struct vrtc *vrtc) struct clocktime ct; struct timespec ts; struct rtcdev *rtc; -#ifdef __FreeBSD__ - struct vm *vm; -#endif int century, error, hour, pm, year; KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); -#ifdef __FreeBSD__ - vm = vrtc->vm; -#endif rtc = &vrtc->rtcdev; bzero(&ct, sizeof(struct clocktime)); error = rtcget(rtc, rtc->sec, &ct.sec); if (error || ct.sec < 0 || ct.sec > 59) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec); -#endif + /* invalid RTC seconds */ goto fail; } error = rtcget(rtc, rtc->min, &ct.min); if (error || ct.min < 0 || ct.min > 59) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min); -#endif + /* invalid RTC minutes */ goto fail; } @@ -342,18 +332,13 @@ rtc_to_secs(struct vrtc *vrtc) if (pm) ct.hour += 12; } else { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC 12-hour format %#x/%d", - rtc->hour, ct.hour); -#endif + /* invalid RTC 12-hour format */ goto fail; } } if (error || ct.hour < 0 || ct.hour > 23) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour); -#endif + /* invalid RTC hour */ goto fail; } @@ -367,47 +352,32 @@ rtc_to_secs(struct vrtc *vrtc) error = rtcget(rtc, rtc->day_of_month, &ct.day); if (error || ct.day < 1 || ct.day > 31) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month, - ct.day); -#endif + /* invalid RTC mday */ goto fail; } error = rtcget(rtc, rtc->month, &ct.mon); if (error || ct.mon < 1 || ct.mon > 12) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon); -#endif + /* invalid RTC month */ goto fail; } error = rtcget(rtc, rtc->year, &year); if (error || year < 0 || year > 99) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year); -#endif + /* invalid RTC year */ goto fail; } error = rtcget(rtc, rtc->century, ¢ury); ct.year = century * 100 + year; if (error || ct.year < POSIX_BASE_YEAR) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC century %#x/%d", rtc->century, - ct.year); -#endif + /* invalid RTC century */ goto fail; } error = clock_ct_to_ts(&ct, &ts); if (error || ts.tv_sec < 0) { -#ifdef __FreeBSD__ - VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d", - ct.year, ct.mon, ct.day); - VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d", - ct.hour, ct.min, ct.sec); -#endif + /* invalid RTC clocktime */ goto fail; } return (ts.tv_sec); /* success */ @@ -416,9 +386,6 @@ fail: * Stop updating the RTC if the date/time fields programmed by * the guest are invalid. */ -#ifdef __FreeBSD__ - VM_CTR0(vrtc->vm, "Invalid RTC date/time programming detected"); -#endif return (VRTC_BROKEN_TIME); } @@ -426,9 +393,6 @@ static int vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase) { struct rtcdev *rtc; -#ifdef __FreeBSD__ - sbintime_t oldbase; -#endif time_t oldtime; uint8_t alarm_sec, alarm_min, alarm_hour; @@ -440,14 +404,9 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase) alarm_hour = rtc->alarm_hour; oldtime = vrtc->base_rtctime; - VM_CTR2(vrtc->vm, "Updating RTC secs from %#lx to %#lx", + VM_CTR2(vrtc->vm, "Updating RTC secs from %lx to %lx", oldtime, newtime); -#ifdef __FreeBSD__ - oldbase = vrtc->base_uptime; - VM_CTR2(vrtc->vm, "Updating RTC base uptime from %#lx to %#lx", - oldbase, newbase); -#endif vrtc->base_uptime = newbase; if (newtime == oldtime) @@ -614,7 +573,7 @@ vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq) active = callout_active(&vrtc->callout) ? 1 : 0; KASSERT((freq == 0 && !active) || (freq != 0 && active), - ("vrtc callout %s with frequency %#lx", + ("vrtc callout %s with frequency %lx", active ? "active" : "inactive", freq)); } @@ -643,7 +602,7 @@ vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval) rtc->reg_c = newirqf | newval; changed = oldval ^ rtc->reg_c; if (changed) { - VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x", + VM_CTR2(vrtc->vm, "RTC reg_c changed from %x to %x", oldval, rtc->reg_c); } @@ -674,7 +633,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) rtc->reg_b = newval; changed = oldval ^ newval; if (changed) { - VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x", + VM_CTR2(vrtc->vm, "RTC reg_b changed from %x to %x", oldval, newval); } @@ -689,7 +648,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) } else { curtime = vrtc_curtime(vrtc, &basetime); KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch " - "between vrtc basetime (%#lx) and curtime (%#lx)", + "between vrtc basetime (%lx) and curtime (%lx)", __func__, vrtc->base_rtctime, curtime)); /* @@ -745,7 +704,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) oldfreq = vrtc_freq(vrtc); if (divider_enabled(oldval) && !divider_enabled(newval)) { - VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx", + VM_CTR2(vrtc->vm, "RTC divider held in reset at %lx/%lx", vrtc->base_rtctime, vrtc->base_uptime); } else if (!divider_enabled(oldval) && divider_enabled(newval)) { /* @@ -755,7 +714,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) * while the dividers were disabled. */ vrtc->base_uptime = sbinuptime(); - VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx", + VM_CTR2(vrtc->vm, "RTC divider out of reset at %lx/%lx", vrtc->base_rtctime, vrtc->base_uptime); } else { /* NOTHING */ @@ -764,7 +723,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) vrtc->rtcdev.reg_a = newval; changed = oldval ^ newval; if (changed) { - VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x", + VM_CTR2(vrtc->vm, "RTC reg_a changed from %x to %x", oldval, newval); } @@ -790,10 +749,10 @@ vrtc_set_time(struct vm *vm, time_t secs) VRTC_UNLOCK(vrtc); if (error) { - VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error, + VM_CTR2(vrtc->vm, "Error %d setting RTC time to %lx", error, secs); } else { - VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs); + VM_CTR1(vrtc->vm, "RTC time set to %lx", secs); } return (error); @@ -835,7 +794,7 @@ vrtc_nvram_write(struct vm *vm, int offset, uint8_t value) VRTC_LOCK(vrtc); ptr = (uint8_t *)(&vrtc->rtcdev); ptr[offset] = value; - VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset); + VM_CTR2(vrtc->vm, "RTC nvram write %x to offset %x", value, offset); VRTC_UNLOCK(vrtc); return (0); @@ -945,24 +904,24 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, } else { *val = *((uint8_t *)rtc + offset); } - VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x", + VCPU_CTR2(vm, vcpuid, "Read value %x from RTC offset %x", *val, offset); } else { switch (offset) { case 10: - VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val); + VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %x", *val); vrtc_set_reg_a(vrtc, *val); break; case 11: - VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val); + VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %x", *val); error = vrtc_set_reg_b(vrtc, *val); break; case 12: - VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)", + VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %x (ignored)", *val); break; case 13: - VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)", + VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %x (ignored)", *val); break; case 0: @@ -972,7 +931,7 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, *val &= 0x7f; /* FALLTHRU */ default: - VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x", + VCPU_CTR2(vm, vcpuid, "RTC offset %x set to %x", offset, *val); *((uint8_t *)rtc + offset) = *val; break; diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h index fbd2884b84..9501850dfc 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h @@ -74,7 +74,7 @@ typedef int (*vmm_init_func_t)(int ipinum); typedef int (*vmm_cleanup_func_t)(void); typedef void (*vmm_resume_func_t)(void); typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); -typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, +typedef int (*vmi_run_func_t)(void *vmi, int vcpu, uint64_t rip, struct pmap *pmap, struct vm_eventinfo *info); typedef void (*vmi_cleanup_func_t)(void *vmi); typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 7a47cd0cd1..4eb967fd89 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -1014,7 +1014,7 @@ vm_iommu_modify(struct vm *vm, bool map) if (map) { KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, - ("iommu map found invalid memmap %#lx/%#lx/%#x", + ("iommu map found invalid memmap %lx/%lx/%x", mm->gpa, mm->len, mm->flags)); if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) continue; @@ -1024,7 +1024,7 @@ vm_iommu_modify(struct vm *vm, bool map) continue; mm->flags &= ~VM_MEMMAP_F_IOMMU; KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, - ("iommu unmap found invalid memmap %#lx/%#lx/%#x", + ("iommu unmap found invalid memmap %lx/%lx/%x", mm->gpa, mm->len, mm->flags)); } @@ -1032,7 +1032,7 @@ vm_iommu_modify(struct vm *vm, bool map) while (gpa < mm->gpa + mm->len) { vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE, &cookie); - KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", + KASSERT(vp != NULL, ("vm(%s) could not map gpa %lx", vm_name(vm), gpa)); vm_gpa_release(cookie); @@ -1213,7 +1213,7 @@ vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) return (error); /* Set 'nextrip' to match the value of %rip */ - VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); + VCPU_CTR1(vm, vcpuid, "Setting nextrip to %lx", val); vcpu = &vm->vcpu[vcpuid]; vcpu->nextrip = val; return (0); @@ -1561,7 +1561,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), vme->u.paging.gpa, ftype); if (rv == 0) { - VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", + VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %lx", ftype == VM_PROT_READ ? "accessed" : "dirty", vme->u.paging.gpa); goto done; @@ -1571,7 +1571,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) map = &vm->vmspace->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); - VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " + VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %lx, " "ftype = %d", rv, vme->u.paging.gpa, ftype); if (rv != KERN_SUCCESS) @@ -1635,7 +1635,7 @@ vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu) inst_addr = vme->rip + vme->u.mmio_emul.cs_base; cs_d = vme->u.mmio_emul.cs_d; - VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", + VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %lx", vme->u.mmio_emul.gpa); /* Fetch the faulting instruction */ @@ -1655,7 +1655,7 @@ vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu) } if (vie_decode_instruction(vie, vm, vcpuid, cs_d) != 0) { - VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", + VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %lx", inst_addr); /* Dump (unrecognized) instruction bytes in userspace */ vie_fallback_exitinfo(vie, vme); @@ -1915,7 +1915,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how) if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) return (EINVAL); - if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { + if (atomic_cmpset_int((uint_t *)&vm->suspend, 0, how) == 0) { VM_CTR2(vm, "virtual machine already suspended %d/%d", vm->suspend, how); return (EALREADY); @@ -2406,7 +2406,7 @@ vm_restart_instruction(void *arg, int vcpuid) * instruction to be restarted. */ vcpu->exitinfo.inst_length = 0; - VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " + VCPU_CTR1(vm, vcpuid, "restarting instruction at %lx by " "setting inst_length to zero", vcpu->exitinfo.rip); } else if (state == VCPU_FROZEN) { /* @@ -2418,7 +2418,7 @@ vm_restart_instruction(void *arg, int vcpuid) error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); KASSERT(!error, ("%s: error %d getting rip", __func__, error)); VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " - "nextrip from %#lx to %#lx", vcpu->nextrip, rip); + "nextrip from %lx to %lx", vcpu->nextrip, rip); vcpu->nextrip = rip; } else { panic("%s: invalid state %d", __func__, state); @@ -2449,7 +2449,7 @@ vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) } else { info = 0; } - VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); + VCPU_CTR2(vm, vcpuid, "%s: info1(%lx)", __func__, info); vcpu->exitintinfo = info; return (0); } @@ -2467,11 +2467,7 @@ exception_class(uint64_t info) { int type, vector; -#ifdef __FreeBSD__ - KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); -#else KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %lx", info)); -#endif type = info & VM_INTINFO_TYPE; vector = info & 0xff; @@ -2519,13 +2515,8 @@ nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, enum exc_class exc1, exc2; int type1, vector1; -#ifdef __FreeBSD__ - KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); - KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); -#else KASSERT(info1 & VM_INTINFO_VALID, ("info1 %lx is not valid", info1)); KASSERT(info2 & VM_INTINFO_VALID, ("info2 %lx is not valid", info2)); -#endif /* * If an exception occurs while attempting to call the double-fault @@ -2534,7 +2525,7 @@ nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, type1 = info1 & VM_INTINFO_TYPE; vector1 = info1 & 0xff; if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { - VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", + VCPU_CTR2(vm, vcpuid, "triple fault: info1(%lx), info2(%lx)", info1, info2); vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); *retinfo = 0; @@ -2594,7 +2585,7 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) if (vcpu->exception_pending) { info2 = vcpu_exception_intinfo(vcpu); vcpu->exception_pending = 0; - VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", + VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %lx", vcpu->exc_vector, info2); } @@ -2611,8 +2602,8 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) } if (valid) { - VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " - "retinfo(%#lx)", __func__, info1, info2, *retinfo); + VCPU_CTR4(vm, vcpuid, "%s: info1(%lx), info2(%lx), " + "retinfo(%lx)", __func__, info1, info2, *retinfo); } return (valid); @@ -2735,7 +2726,7 @@ vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) int error; vm = vmarg; - VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", + VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %x, cr2 %lx", error_code, cr2); error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c index 4dcaba8a82..696052d7d6 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c @@ -181,7 +181,7 @@ vmm_alloc_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused) { struct kmem_item *i = (struct kmem_item *)val; - cmn_err(CE_PANIC, "!vmm_alloc_check: hash not empty: %p, %d", i->addr, + cmn_err(CE_PANIC, "!vmm_alloc_check: hash not empty: %p, %lu", i->addr, i->size); return (MH_WALK_TERMINATE); diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index fc0cf6622f..ae450f1d9b 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -1431,7 +1431,7 @@ static char *x86_feature_names[NUM_X86_FEATURES] = { "tbm", "avx512_vnni", "amd_pcec", - "mb_clear", + "md_clear", "mds_no", "core_thermal", "pkg_thermal", diff --git a/usr/src/uts/i86pc/os/gipt.c b/usr/src/uts/i86pc/os/gipt.c index ace7e03438..7bff5c3897 100644 --- a/usr/src/uts/i86pc/os/gipt.c +++ b/usr/src/uts/i86pc/os/gipt.c @@ -355,7 +355,8 @@ gipt_map_next_page(gipt_map_t *map, uint64_t va, uint64_t max_va, gipt_t **ptp) ASSERT3P(pt, !=, NULL); break; } else { - panic("unexpected PTE type %x @ va %p", ptet, cur_va); + panic("unexpected PTE type %x @ va %p", ptet, + (void *)cur_va); } } @@ -387,7 +388,8 @@ gipt_map_next_page(gipt_map_t *map, uint64_t va, uint64_t max_va, gipt_t **ptp) pt = gipt_map_lookup(map, cur_va, pt->gipt_level - 1); ASSERT3P(pt, !=, NULL); } else { - panic("unexpected PTE type %x @ va %p", ptet, cur_va); + panic("unexpected PTE type %x @ va %p", ptet, + (void *)cur_va); } } diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c index a41ff3e0d1..0e84030ac1 100644 --- a/usr/src/uts/i86pc/os/hma.c +++ b/usr/src/uts/i86pc/os/hma.c @@ -11,6 +11,7 @@ /* * Copyright 2019 Joyent, Inc. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ #include <sys/cpuvar.h> @@ -33,6 +34,7 @@ struct hma_reg { static kmutex_t hma_lock; static list_t hma_registrations; static boolean_t hma_exclusive = B_FALSE; +int hma_disable = 0; static boolean_t hma_vmx_ready = B_FALSE; static const char *hma_vmx_error = NULL; @@ -89,6 +91,11 @@ hma_init(void) list_create(&hma_registrations, sizeof (struct hma_reg), offsetof(struct hma_reg, hr_node)); + if (hma_disable != 0) { + cmn_err(CE_CONT, "?hma_init: disabled"); + return; + } + switch (cpuid_getvendor(CPU)) { case X86_VENDOR_Intel: (void) hma_vmx_init(); diff --git a/usr/src/uts/i86pc/os/mp_implfuncs.c b/usr/src/uts/i86pc/os/mp_implfuncs.c index 2d0bd3eb53..c61e6216b0 100644 --- a/usr/src/uts/i86pc/os/mp_implfuncs.c +++ b/usr/src/uts/i86pc/os/mp_implfuncs.c @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2020 Oxide Computer Company */ #define PSMI_1_7 @@ -32,6 +33,7 @@ #include <sys/psm_modctl.h> #include <sys/smp_impldefs.h> #include <sys/reboot.h> +#include <sys/prom_debug.h> #if defined(__xpv) #include <sys/hypervisor.h> #include <vm/kboot_mmu.h> @@ -390,12 +392,6 @@ psm_modload(void) close_mach_list(); } -#if defined(__xpv) -#define NOTSUP_MSG "This version of Solaris xVM does not support this hardware" -#else -#define NOTSUP_MSG "This version of Solaris does not support this hardware" -#endif /* __xpv */ - void psm_install(void) { @@ -406,14 +402,18 @@ psm_install(void) mutex_enter(&psmsw_lock); for (swp = psmsw->psw_forw; swp != psmsw; ) { + PRM_DEBUGS(swp->psw_infop->p_mach_idstring); opsp = swp->psw_infop->p_ops; if (opsp->psm_probe) { + PRM_POINT("psm_probe()"); if ((*opsp->psm_probe)() == PSM_SUCCESS) { + PRM_POINT("psm_probe() PSM_SUCCESS"); psmcnt++; swp->psw_flag |= PSM_MOD_IDENTIFY; swp = swp->psw_forw; continue; } + PRM_POINT("psm_probe() FAILURE"); } /* remove the unsuccessful psm modules */ cswp = swp; @@ -429,7 +429,8 @@ psm_install(void) } mutex_exit(&psmsw_lock); if (psmcnt == 0) - halt(NOTSUP_MSG); + halt("the operating system does not yet support this hardware"); + PRM_POINT("psminitf()"); (*psminitf)(); } diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c index f36f5f052d..f017995ac8 100644 --- a/usr/src/uts/i86pc/os/mp_machdep.c +++ b/usr/src/uts/i86pc/os/mp_machdep.c @@ -26,6 +26,7 @@ * Copyright (c) 2009-2010, Intel Corporation. * All rights reserved. * Copyright 2018 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #define PSMI_1_7 @@ -63,6 +64,8 @@ #include <sys/sunddi.h> #include <sys/sunndi.h> #include <sys/cpc_pcbe.h> +#include <sys/prom_debug.h> + #define OFFSETOF(s, m) (size_t)(&(((s *)0)->m)) @@ -978,6 +981,7 @@ mach_init() { struct psm_ops *pops; + PRM_POINT("mach_construct_info()"); mach_construct_info(); pops = mach_set[0]; @@ -1017,6 +1021,7 @@ mach_init() notify_error = pops->psm_notify_error; } + PRM_POINT("psm_softinit()"); (*pops->psm_softinit)(); /* @@ -1034,6 +1039,7 @@ mach_init() #ifndef __xpv non_deep_idle_disp_enq_thread = disp_enq_thread; #endif + PRM_DEBUG(idle_cpu_use_hlt); if (idle_cpu_use_hlt) { idle_cpu = cpu_idle_adaptive; CPU->cpu_m.mcpu_idle_cpu = cpu_idle; @@ -1068,6 +1074,7 @@ mach_init() #endif } + PRM_POINT("mach_smpinit()"); mach_smpinit(); } diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index 636e58280a..dd2b5d703b 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -25,6 +25,7 @@ * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2018 Joyent, Inc. * Copyright (c) 2015 by Delphix. All rights reserved. + * Copyright 2020 Oxide Computer Company */ /* * Copyright (c) 2010, Intel Corporation. @@ -74,6 +75,7 @@ #include <sys/memlist_plat.h> #include <sys/varargs.h> #include <sys/promif.h> +#include <sys/prom_debug.h> #include <sys/modctl.h> #include <sys/sunddi.h> @@ -464,7 +466,7 @@ static pgcnt_t kphysm_init(page_t *, pgcnt_t); * | | * 0xFFFFFXXX.XXX00000 |-----------------------|- segkvmm_base (floating) * | segkp | - * |-----------------------|- segkp_base (floating) + * |-----------------------|- segkp_base (floating) * | page_t structures | valloc_base + valloc_sz * | memsegs, memlists, | * | page hash, etc. | @@ -623,21 +625,8 @@ size_t toxic_bit_map_len = 0; /* in bits */ #endif /* __i386 */ -/* - * Simple boot time debug facilities - */ -static char *prm_dbg_str[] = { - "%s:%d: '%s' is 0x%x\n", - "%s:%d: '%s' is 0x%llx\n" -}; - int prom_debug; -#define PRM_DEBUG(q) if (prom_debug) \ - prom_printf(prm_dbg_str[sizeof (q) >> 3], "startup.c", __LINE__, #q, q); -#define PRM_POINT(q) if (prom_debug) \ - prom_printf("%s:%d: %s\n", "startup.c", __LINE__, q); - /* * This structure is used to keep track of the intial allocations * done in startup_memlist(). The value of NUM_ALLOCATIONS needs to @@ -2273,6 +2262,7 @@ startup_end(void) * We can now setup for XSAVE because fpu_probe is done in configure(). */ if (fp_save_mech == FP_XSAVE) { + PRM_POINT("xsave_setup_msr()"); xsave_setup_msr(CPU); } @@ -2281,7 +2271,9 @@ startup_end(void) * support. */ setx86isalist(); + PRM_POINT("cpu_intr_alloc()"); cpu_intr_alloc(CPU, NINTR_THREADS); + PRM_POINT("psm_install()"); psm_install(); /* diff --git a/usr/src/uts/i86pc/sys/hpet_acpi.h b/usr/src/uts/i86pc/sys/hpet_acpi.h index e60ebe4bba..81304674b5 100644 --- a/usr/src/uts/i86pc/sys/hpet_acpi.h +++ b/usr/src/uts/i86pc/sys/hpet_acpi.h @@ -36,7 +36,7 @@ extern "C" { #endif /* - * Solaris uses an HPET Timer to generate interrupts for CPUs in Deep C-state + * illumos uses an HPET Timer to generate interrupts for CPUs in Deep C-state * with stalled LAPIC Timers. All CPUs use one HPET timer. The timer's * interrupt targets one CPU (via the I/O APIC). The one CPU that receives * the HPET's interrupt wakes up other CPUs as needed during the HPET Interrupt @@ -46,7 +46,7 @@ extern "C" { * Please see the Intel Programmer's guides. Interrupts are disabled before * a CPU Halts into Deep C-state. (This allows CPU-hardware-specific cleanup * before servicing interrupts.) When a Deep C-state CPU wakes up (due to - * an externally generated interrupt), it resume execution where it halted. + * an externally generated interrupt), it resumes execution where it halted. * The CPU returning from Deep C-state must enable interrupts before it will * handle the pending interrupt that woke it from Deep C-state. * @@ -72,7 +72,7 @@ extern "C" { * } timers[32]; * } * - * There are 32 possible timers in an hpet. Only the first 3 timers are + * There are 32 possible timers in an HPET. Only the first 3 timers are * required. The other 29 timers are optional. * * HPETs can have 64-bit or 32-bit timers. Timers/compare registers can @@ -80,7 +80,7 @@ extern "C" { * The first two timers are not used. The HPET spec intends the first two * timers to be used as "legacy replacement" for the PIT and RTC timers. * - * Solaris uses the first available non-legacy replacement timer as a proxy + * illumos uses the first available non-legacy replacement timer as a proxy * timer for processor Local APIC Timers that stop in deep idle C-states. */ @@ -97,7 +97,7 @@ extern "C" { #define HPET_SIZE (1024) /* - * Offsets of hpet registers and macros to access them from HPET base address. + * Offsets of HPET registers and macros to access them from HPET base address. */ #define HPET_GEN_CAP_OFFSET (0) #define HPET_GEN_CONFIG_OFFSET (0x10) diff --git a/usr/src/uts/i86pc/sys/prom_debug.h b/usr/src/uts/i86pc/sys/prom_debug.h new file mode 100644 index 0000000000..ae64d91711 --- /dev/null +++ b/usr/src/uts/i86pc/sys/prom_debug.h @@ -0,0 +1,72 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Oxide Computer Company + */ + +#ifndef _SYS_PROM_DEBUG_H +#define _SYS_PROM_DEBUG_H + +#include <sys/promif.h> + +/* + * These macros are used to emit coarse-grained early boot debugging + * information when the user sets "prom_debug" in the boot environment. They + * should only be used for information that we cannot easily obtain through a + * richer mechanism because the machine hangs or crashes before other debugging + * tools are available. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +extern int prom_debug; + +/* + * Print a string message, used to signal that we have at least reached a + * particular point in the code: + */ +#define PRM_POINT(q) do { \ + if (prom_debug) { \ + prom_printf("%s:%d: %s\n", \ + __FILE__, __LINE__, (q)); \ + } \ + } while (0) + +/* + * Print the name and value of an integer variable: + */ +#define PRM_DEBUG(q) do { \ + if (prom_debug) { \ + prom_printf("%s:%d: '%s' is 0x%llx\n", \ + __FILE__, __LINE__, #q, (long long)(q)); \ + } \ + } while (0) + +/* + * Print the name and value of a string (char *) variable (which may be NULL): + */ +#define PRM_DEBUGS(q) do { \ + if (prom_debug) { \ + const char *qq = q; \ + prom_printf("%s:%d: '%s' is '%s'\n", \ + __FILE__, __LINE__, #q, \ + qq != NULL ? qq : "<NULL>"); \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_PROM_DEBUG_H */ diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile index e7f07c4c4e..0106dd0a0f 100644 --- a/usr/src/uts/i86pc/vmm/Makefile +++ b/usr/src/uts/i86pc/vmm/Makefile @@ -43,7 +43,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # Overrides and additions # -CERRWARN += -_gcc=-Wno-empty-body # 3rd party code SMOFF += all_func_returns @@ -51,9 +50,6 @@ SMOFF += all_func_returns # needs work $(OBJS_DIR)/vmm_sol_dev.o := SMOFF += signed_integer_overflow_check -# a can't happen: vmx_setcap() warn: variable dereferenced before check 'pptr' -$(OBJS_DIR)/vmx.o := SMOFF += deref_check - ALL_BUILDS = $(ALL_BUILDSONLY64) DEF_BUILDS = $(DEF_BUILDSONLY64) PRE_INC_PATH = -I$(COMPAT)/bhyve -I$(COMPAT)/bhyve/amd64 \ @@ -61,17 +57,9 @@ PRE_INC_PATH = -I$(COMPAT)/bhyve -I$(COMPAT)/bhyve/amd64 \ INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(UTSBASE)/i86pc/io/vmm/io AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR) -CFLAGS += -_gcc=-Wimplicit-function-declaration -# The FreeBSD %# notation makes gcc gripe -CFLAGS += -_gcc=-Wno-format # enable collection of VMM statistics CFLAGS += -DVMM_KEEP_STATS -$(OBJS_DIR)/vmm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits -$(OBJS_DIR)/svm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits -$(OBJS_DIR)/vmx.o := CERRWARN += -_gcc=-Wno-unused-variable -$(OBJS_DIR)/iommu.o := CERRWARN += -_gcc=-Wno-unused-variable - LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev LDFLAGS += -z type=kmod -M $(MAPFILE) |