diff options
| author | Patrick Mooney <pmooney@pfmooney.com> | 2019-02-13 21:18:37 +0000 |
|---|---|---|
| committer | Patrick Mooney <pmooney@pfmooney.com> | 2019-02-20 21:35:38 +0000 |
| commit | c6a22c86f2d0254060157d591af12a83e37b9a50 (patch) | |
| tree | 7ca57323cc911ea806e535b1de7909873f35f1b6 | |
| parent | d7e5de8ade719deb02b214bb5901ab33c0406f0f (diff) | |
| download | illumos-joyent-c6a22c86f2d0254060157d591af12a83e37b9a50.tar.gz | |
OS-7580 bhyve upstream sync 2019 Feb
Reviewed by: John Levon <john.levon@joyent.com>
Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
34 files changed, 1054 insertions, 476 deletions
diff --git a/usr/contrib/freebsd/x86/specialreg.h b/usr/contrib/freebsd/x86/specialreg.h index 10bc4e7bd9..5e223fa60d 100644 --- a/usr/contrib/freebsd/x86/specialreg.h +++ b/usr/contrib/freebsd/x86/specialreg.h @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * @@ -10,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -74,6 +76,7 @@ #define CR4_PCIDE 0x00020000 /* Enable Context ID */ #define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */ #define CR4_SMEP 0x00100000 /* Supervisor-Mode Execution Prevention */ +#define CR4_SMAP 0x00200000 /* Supervisor-Mode Access Prevention */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. @@ -186,8 +189,43 @@ #define CPUTPM1_SENSOR 0x00000001 #define CPUTPM1_TURBO 0x00000002 #define CPUTPM1_ARAT 0x00000004 +#define CPUTPM1_HWP 0x00000080 +#define CPUTPM1_HWP_NOTIFICATION 0x00000100 +#define CPUTPM1_HWP_ACTIVITY_WINDOW 0x00000200 +#define CPUTPM1_HWP_PERF_PREF 0x00000400 +#define CPUTPM1_HWP_PKG 0x00000800 +#define CPUTPM1_HWP_FLEXIBLE 0x00020000 #define CPUTPM2_EFFREQ 0x00000001 +/* Intel Processor Trace CPUID. */ + +/* Leaf 0 ebx. */ +#define CPUPT_CR3 (1 << 0) /* CR3 Filtering Support */ +#define CPUPT_PSB (1 << 1) /* Configurable PSB and Cycle-Accurate Mode Supported */ +#define CPUPT_IPF (1 << 2) /* IP Filtering and TraceStop supported */ +#define CPUPT_MTC (1 << 3) /* MTC Supported */ +#define CPUPT_PRW (1 << 4) /* PTWRITE Supported */ +#define CPUPT_PWR (1 << 5) /* Power Event Trace Supported */ + +/* Leaf 0 ecx. */ +#define CPUPT_TOPA (1 << 0) /* ToPA Output Supported */ +#define CPUPT_TOPA_MULTI (1 << 1) /* ToPA Tables Allow Multiple Output Entries */ +#define CPUPT_SINGLE (1 << 2) /* Single-Range Output Supported */ +#define CPUPT_TT_OUT (1 << 3) /* Output to Trace Transport Subsystem Supported */ +#define CPUPT_LINEAR_IP (1 << 31) /* IP Payloads are Linear IP, otherwise IP is effective */ + +/* Leaf 1 eax. */ +#define CPUPT_NADDR_S 0 /* Number of Address Ranges */ +#define CPUPT_NADDR_M (0x7 << CPUPT_NADDR_S) +#define CPUPT_MTC_BITMAP_S 16 /* Bitmap of supported MTC Period Encodings */ +#define CPUPT_MTC_BITMAP_M (0xffff << CPUPT_MTC_BITMAP_S) + +/* Leaf 1 ebx. */ +#define CPUPT_CT_BITMAP_S 0 /* Bitmap of supported Cycle Threshold values */ +#define CPUPT_CT_BITMAP_M (0xffff << CPUPT_CT_BITMAP_S) +#define CPUPT_PFE_BITMAP_S 16 /* Bitmap of supported Configurable PSB Frequency encoding */ +#define CPUPT_PFE_BITMAP_M (0xffff << CPUPT_PFE_BITMAP_S) + /* * Important bits in the AMD extended cpuid flags */ @@ -308,6 +346,15 @@ #define CPUID_EXTSTATE_XSAVES 0x00000008 /* + * AMD extended function 8000_0007h ebx info + */ +#define AMDRAS_MCA_OF_RECOV 0x00000001 +#define AMDRAS_SUCCOR 0x00000002 +#define AMDRAS_HW_ASSERT 0x00000004 +#define AMDRAS_SCALABLE_MCA 0x00000008 +#define AMDRAS_PFEH_SUPPORT 0x00000010 + +/* * AMD extended function 8000_0007h edx info */ #define AMDPM_TS 0x00000001 @@ -322,6 +369,24 @@ #define AMDPM_CPB 0x00000200 /* + * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions) + */ +#define AMDFEID_CLZERO 0x00000001 +#define AMDFEID_IRPERF 0x00000002 +#define AMDFEID_XSAVEERPTR 0x00000004 +#define AMDFEID_IBPB 0x00001000 +#define AMDFEID_IBRS 0x00004000 +#define AMDFEID_STIBP 0x00008000 +/* The below are only defined if the corresponding base feature above exists. */ +#define AMDFEID_IBRS_ALWAYSON 0x00010000 +#define AMDFEID_STIBP_ALWAYSON 0x00020000 +#define AMDFEID_PREFER_IBRS 0x00040000 +#define AMDFEID_SSBD 0x01000000 +/* SSBD via MSRC001_011F instead of MSR 0x48: */ +#define AMDFEID_VIRT_SSBD 0x02000000 +#define AMDFEID_SSB_NO 0x04000000 + +/* * AMD extended function 8000_0008h ecx info */ #define AMDID_CMP_CORES 0x000000ff @@ -362,6 +427,7 @@ #define CPUID_STDEXT_AVX512CD 0x10000000 #define CPUID_STDEXT_SHA 0x20000000 #define CPUID_STDEXT_AVX512BW 0x40000000 +#define CPUID_STDEXT_AVX512VL 0x80000000 /* * CPUID instruction 7 Structured Extended Features, leaf 0 ecx info @@ -370,10 +436,32 @@ #define CPUID_STDEXT2_UMIP 0x00000004 #define CPUID_STDEXT2_PKU 0x00000008 #define CPUID_STDEXT2_OSPKE 0x00000010 +#define CPUID_STDEXT2_WAITPKG 0x00000020 +#define CPUID_STDEXT2_GFNI 0x00000100 #define CPUID_STDEXT2_RDPID 0x00400000 +#define CPUID_STDEXT2_CLDEMOTE 0x02000000 +#define CPUID_STDEXT2_MOVDIRI 0x08000000 +#define CPUID_STDEXT2_MOVDIRI64B 0x10000000 #define CPUID_STDEXT2_SGXLC 0x40000000 /* + * CPUID instruction 7 Structured Extended Features, leaf 0 edx info + */ +#define CPUID_STDEXT3_IBPB 0x04000000 +#define CPUID_STDEXT3_STIBP 0x08000000 +#define CPUID_STDEXT3_L1D_FLUSH 0x10000000 +#define CPUID_STDEXT3_ARCH_CAP 0x20000000 +#define CPUID_STDEXT3_CORE_CAP 0x40000000 +#define CPUID_STDEXT3_SSBD 0x80000000 + +/* MSR IA32_ARCH_CAP(ABILITIES) bits */ +#define IA32_ARCH_CAP_RDCL_NO 0x00000001 +#define IA32_ARCH_CAP_IBRS_ALL 0x00000002 +#define IA32_ARCH_CAP_RSBA 0x00000004 +#define IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY 0x00000008 +#define IA32_ARCH_CAP_SSB_NO 0x00000010 + +/* * CPUID manufacturers identifiers */ #define AMD_VENDOR_ID "AuthenticAMD" @@ -401,6 +489,8 @@ #define MSR_EBL_CR_POWERON 0x02a #define MSR_TEST_CTL 0x033 #define MSR_IA32_FEATURE_CONTROL 0x03a +#define MSR_IA32_SPEC_CTRL 0x048 +#define MSR_IA32_PRED_CMD 0x049 #define MSR_BIOS_UPDT_TRIG 0x079 #define MSR_BBL_CR_D0 0x088 #define MSR_BBL_CR_D1 0x089 @@ -413,6 +503,8 @@ #define MSR_APERF 0x0e8 #define MSR_IA32_EXT_CONFIG 0x0ee /* Undocumented. Core Solo/Duo only */ #define MSR_MTRRcap 0x0fe +#define MSR_IA32_ARCH_CAP 0x10a +#define MSR_IA32_FLUSH_CMD 0x10b #define MSR_BBL_CR_ADDR 0x116 #define MSR_BBL_CR_DECC 0x118 #define MSR_BBL_CR_CTL 0x119 @@ -472,7 +564,14 @@ #define MSR_DRAM_ENERGY_STATUS 0x619 #define MSR_PP0_ENERGY_STATUS 0x639 #define MSR_PP1_ENERGY_STATUS 0x641 +#define MSR_PPERF 0x64e #define MSR_TSC_DEADLINE 0x6e0 /* Writes are not serializing */ +#define MSR_IA32_PM_ENABLE 0x770 +#define MSR_IA32_HWP_CAPABILITIES 0x771 +#define MSR_IA32_HWP_REQUEST_PKG 0x772 +#define MSR_IA32_HWP_INTERRUPT 0x773 +#define MSR_IA32_HWP_REQUEST 0x774 +#define MSR_IA32_HWP_STATUS 0x777 /* * VMX MSRs @@ -531,6 +630,85 @@ #define MSR_IA32_XSS 0xda0 /* + * Intel Processor Trace (PT) MSRs. + */ +#define MSR_IA32_RTIT_OUTPUT_BASE 0x560 /* Trace Output Base Register (R/W) */ +#define MSR_IA32_RTIT_OUTPUT_MASK_PTRS 0x561 /* Trace Output Mask Pointers Register (R/W) */ +#define MSR_IA32_RTIT_CTL 0x570 /* Trace Control Register (R/W) */ +#define RTIT_CTL_TRACEEN (1 << 0) +#define RTIT_CTL_CYCEN (1 << 1) +#define RTIT_CTL_OS (1 << 2) +#define RTIT_CTL_USER (1 << 3) +#define RTIT_CTL_PWREVTEN (1 << 4) +#define RTIT_CTL_FUPONPTW (1 << 5) +#define RTIT_CTL_FABRICEN (1 << 6) +#define RTIT_CTL_CR3FILTER (1 << 7) +#define RTIT_CTL_TOPA (1 << 8) +#define RTIT_CTL_MTCEN (1 << 9) +#define RTIT_CTL_TSCEN (1 << 10) +#define RTIT_CTL_DISRETC (1 << 11) +#define RTIT_CTL_PTWEN (1 << 12) +#define RTIT_CTL_BRANCHEN (1 << 13) +#define RTIT_CTL_MTC_FREQ_S 14 +#define RTIT_CTL_MTC_FREQ(n) ((n) << RTIT_CTL_MTC_FREQ_S) +#define RTIT_CTL_MTC_FREQ_M (0xf << RTIT_CTL_MTC_FREQ_S) +#define RTIT_CTL_CYC_THRESH_S 19 +#define RTIT_CTL_CYC_THRESH_M (0xf << RTIT_CTL_CYC_THRESH_S) +#define RTIT_CTL_PSB_FREQ_S 24 +#define RTIT_CTL_PSB_FREQ_M (0xf << RTIT_CTL_PSB_FREQ_S) +#define RTIT_CTL_ADDR_CFG_S(n) (32 + (n) * 4) +#define RTIT_CTL_ADDR0_CFG_S 32 +#define RTIT_CTL_ADDR0_CFG_M (0xfULL << RTIT_CTL_ADDR0_CFG_S) +#define RTIT_CTL_ADDR1_CFG_S 36 +#define RTIT_CTL_ADDR1_CFG_M (0xfULL << RTIT_CTL_ADDR1_CFG_S) +#define RTIT_CTL_ADDR2_CFG_S 40 +#define RTIT_CTL_ADDR2_CFG_M (0xfULL << RTIT_CTL_ADDR2_CFG_S) +#define RTIT_CTL_ADDR3_CFG_S 44 +#define RTIT_CTL_ADDR3_CFG_M (0xfULL << RTIT_CTL_ADDR3_CFG_S) +#define MSR_IA32_RTIT_STATUS 0x571 /* Tracing Status Register (R/W) */ +#define RTIT_STATUS_FILTEREN (1 << 0) +#define RTIT_STATUS_CONTEXTEN (1 << 1) +#define RTIT_STATUS_TRIGGEREN (1 << 2) +#define RTIT_STATUS_ERROR (1 << 4) +#define RTIT_STATUS_STOPPED (1 << 5) +#define RTIT_STATUS_PACKETBYTECNT_S 32 +#define RTIT_STATUS_PACKETBYTECNT_M (0x1ffffULL << RTIT_STATUS_PACKETBYTECNT_S) +#define MSR_IA32_RTIT_CR3_MATCH 0x572 /* Trace Filter CR3 Match Register (R/W) */ +#define MSR_IA32_RTIT_ADDR_A(n) (0x580 + (n) * 2) +#define MSR_IA32_RTIT_ADDR_B(n) (0x581 + (n) * 2) +#define MSR_IA32_RTIT_ADDR0_A 0x580 /* Region 0 Start Address (R/W) */ +#define MSR_IA32_RTIT_ADDR0_B 0x581 /* Region 0 End Address (R/W) */ +#define MSR_IA32_RTIT_ADDR1_A 0x582 /* Region 1 Start Address (R/W) */ +#define MSR_IA32_RTIT_ADDR1_B 0x583 /* Region 1 End Address (R/W) */ +#define MSR_IA32_RTIT_ADDR2_A 0x584 /* Region 2 Start Address (R/W) */ +#define MSR_IA32_RTIT_ADDR2_B 0x585 /* Region 2 End Address (R/W) */ +#define MSR_IA32_RTIT_ADDR3_A 0x586 /* Region 3 Start Address (R/W) */ +#define MSR_IA32_RTIT_ADDR3_B 0x587 /* Region 3 End Address (R/W) */ + +/* Intel Processor Trace Table of Physical Addresses (ToPA). */ +#define TOPA_SIZE_S 6 +#define TOPA_SIZE_M (0xf << TOPA_SIZE_S) +#define TOPA_SIZE_4K (0 << TOPA_SIZE_S) +#define TOPA_SIZE_8K (1 << TOPA_SIZE_S) +#define TOPA_SIZE_16K (2 << TOPA_SIZE_S) +#define TOPA_SIZE_32K (3 << TOPA_SIZE_S) +#define TOPA_SIZE_64K (4 << TOPA_SIZE_S) +#define TOPA_SIZE_128K (5 << TOPA_SIZE_S) +#define TOPA_SIZE_256K (6 << TOPA_SIZE_S) +#define TOPA_SIZE_512K (7 << TOPA_SIZE_S) +#define TOPA_SIZE_1M (8 << TOPA_SIZE_S) +#define TOPA_SIZE_2M (9 << TOPA_SIZE_S) +#define TOPA_SIZE_4M (10 << TOPA_SIZE_S) +#define TOPA_SIZE_8M (11 << TOPA_SIZE_S) +#define TOPA_SIZE_16M (12 << TOPA_SIZE_S) +#define TOPA_SIZE_32M (13 << TOPA_SIZE_S) +#define TOPA_SIZE_64M (14 << TOPA_SIZE_S) +#define TOPA_SIZE_128M (15 << TOPA_SIZE_S) +#define TOPA_STOP (1 << 4) +#define TOPA_INT (1 << 2) +#define TOPA_END (1 << 0) + +/* * Constants related to MSR's. */ #define APICBASE_RESERVED 0x000002ff @@ -556,6 +734,44 @@ #define IA32_MISC_EN_XDD 0x0000000400000000ULL /* + * IA32_SPEC_CTRL and IA32_PRED_CMD MSRs are described in the Intel' + * document 336996-001 Speculative Execution Side Channel Mitigations. + * + * AMD uses the same MSRs and bit definitions, as described in 111006-B + * "Indirect Branch Control Extension" and 124441 "Speculative Store Bypass + * Disable." + */ +/* MSR IA32_SPEC_CTRL */ +#define IA32_SPEC_CTRL_IBRS 0x00000001 +#define IA32_SPEC_CTRL_STIBP 0x00000002 +#define IA32_SPEC_CTRL_SSBD 0x00000004 + +/* MSR IA32_PRED_CMD */ +#define IA32_PRED_CMD_IBPB_BARRIER 0x0000000000000001ULL + +/* MSR IA32_FLUSH_CMD */ +#define IA32_FLUSH_CMD_L1D 0x00000001 + +/* MSR IA32_HWP_CAPABILITIES */ +#define IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(x) (((x) >> 0) & 0xff) +#define IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(x) (((x) >> 8) & 0xff) +#define IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(x) (((x) >> 16) & 0xff) +#define IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(x) (((x) >> 24) & 0xff) + +/* MSR IA32_HWP_REQUEST */ +#define IA32_HWP_REQUEST_MINIMUM_VALID (1ULL << 63) +#define IA32_HWP_REQUEST_MAXIMUM_VALID (1ULL << 62) +#define IA32_HWP_REQUEST_DESIRED_VALID (1ULL << 61) +#define IA32_HWP_REQUEST_EPP_VALID (1ULL << 60) +#define IA32_HWP_REQUEST_ACTIVITY_WINDOW_VALID (1ULL << 59) +#define IA32_HWP_REQUEST_PACKAGE_CONTROL (1ULL << 42) +#define IA32_HWP_ACTIVITY_WINDOW (0x3ffULL << 32) +#define IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE (0xffULL << 24) +#define IA32_HWP_DESIRED_PERFORMANCE (0xffULL << 16) +#define IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE (0xffULL << 8) +#define IA32_HWP_MINIMUM_PERFORMANCE (0xffULL << 0) + +/* * PAT modes. */ #define PAT_UNCACHEABLE 0x00 @@ -706,21 +922,22 @@ #define MC_CTL2_THRESHOLD 0x0000000000007fff #define MC_CTL2_CMCI_EN 0x0000000040000000 #define MC_AMDNB_BANK 4 -#define MC_MISC_AMDNB_VAL 0x8000000000000000 /* Counter presence valid */ -#define MC_MISC_AMDNB_CNTP 0x4000000000000000 /* Counter present */ -#define MC_MISC_AMDNB_LOCK 0x2000000000000000 /* Register locked */ -#define MC_MISC_AMDNB_LVT_MASK 0x00f0000000000000 /* Extended LVT offset */ -#define MC_MISC_AMDNB_LVT_SHIFT 52 -#define MC_MISC_AMDNB_CNTEN 0x0008000000000000 /* Counter enabled */ -#define MC_MISC_AMDNB_INT_MASK 0x0006000000000000 /* Interrupt type */ -#define MC_MISC_AMDNB_INT_LVT 0x0002000000000000 /* Interrupt via Extended LVT */ -#define MC_MISC_AMDNB_INT_SMI 0x0004000000000000 /* SMI */ -#define MC_MISC_AMDNB_OVERFLOW 0x0001000000000000 /* Counter overflow */ -#define MC_MISC_AMDNB_CNT_MASK 0x00000fff00000000 /* Counter value */ -#define MC_MISC_AMDNB_CNT_SHIFT 32 -#define MC_MISC_AMDNB_CNT_MAX 0xfff -#define MC_MISC_AMDNB_PTR_MASK 0x00000000ff000000 /* Pointer to additional registers */ -#define MC_MISC_AMDNB_PTR_SHIFT 24 +#define MC_MISC_AMD_VAL 0x8000000000000000 /* Counter presence valid */ +#define MC_MISC_AMD_CNTP 0x4000000000000000 /* Counter present */ +#define MC_MISC_AMD_LOCK 0x2000000000000000 /* Register locked */ +#define MC_MISC_AMD_INTP 0x1000000000000000 /* Int. type can generate interrupts */ +#define MC_MISC_AMD_LVT_MASK 0x00f0000000000000 /* Extended LVT offset */ +#define MC_MISC_AMD_LVT_SHIFT 52 +#define MC_MISC_AMD_CNTEN 0x0008000000000000 /* Counter enabled */ +#define MC_MISC_AMD_INT_MASK 0x0006000000000000 /* Interrupt type */ +#define MC_MISC_AMD_INT_LVT 0x0002000000000000 /* Interrupt via Extended LVT */ +#define MC_MISC_AMD_INT_SMI 0x0004000000000000 /* SMI */ +#define MC_MISC_AMD_OVERFLOW 0x0001000000000000 /* Counter overflow */ +#define MC_MISC_AMD_CNT_MASK 0x00000fff00000000 /* Counter value */ +#define MC_MISC_AMD_CNT_SHIFT 32 +#define MC_MISC_AMD_CNT_MAX 0xfff +#define MC_MISC_AMD_PTR_MASK 0x00000000ff000000 /* Pointer to additional registers */ +#define MC_MISC_AMD_PTR_SHIFT 24 /* * The following four 3-byte registers control the non-cacheable regions. @@ -841,18 +1058,19 @@ #define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */ #define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */ #define MSR_NB_CFG1 0xc001001f /* NB configuration 1 */ +#define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ +#define MSR_MC0_CTL_MASK 0xc0010044 #define MSR_P_STATE_LIMIT 0xc0010061 /* P-state Current Limit Register */ #define MSR_P_STATE_CONTROL 0xc0010062 /* P-state Control Register */ #define MSR_P_STATE_STATUS 0xc0010063 /* P-state Status Register */ #define MSR_P_STATE_CONFIG(n) (0xc0010064 + (n)) /* P-state Config */ #define MSR_SMM_ADDR 0xc0010112 /* SMM TSEG base address */ #define MSR_SMM_MASK 0xc0010113 /* SMM TSEG address mask */ +#define MSR_VM_CR 0xc0010114 /* SVM: feature control */ +#define MSR_VM_HSAVE_PA 0xc0010117 /* SVM: host save area address */ +#define MSR_AMD_CPUID07 0xc0011002 /* CPUID 07 %ebx override */ #define MSR_EXTFEATURES 0xc0011005 /* Extended CPUID Features override */ #define MSR_IC_CFG 0xc0011021 /* Instruction Cache Configuration */ -#define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ -#define MSR_MC0_CTL_MASK 0xc0010044 -#define MSR_VM_CR 0xc0010114 /* SVM: feature control */ -#define MSR_VM_HSAVE_PA 0xc0010117 /* SVM: host save area address */ /* MSR_VM_CR related */ #define VM_CR_SVMDIS 0x10 /* SVM: disabled by BIOS */ diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 7afc47111b..ccf89b4613 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -52,6 +52,13 @@ __FBSDID("$FreeBSD$"); #include <sys/time.h> #include <sys/cpuset.h> +#ifdef __FreeBSD__ +#include <amd64/vmm/intel/vmcs.h> +#else +#include <intel/vmcs.h> +#endif + +#include <machine/atomic.h> #include <machine/segments.h> #ifndef WITHOUT_CAPSICUM @@ -65,7 +72,6 @@ __FBSDID("$FreeBSD$"); #include <libgen.h> #include <unistd.h> #include <assert.h> -#include <errno.h> #include <pthread.h> #include <pthread_np.h> #include <sysexits.h> @@ -109,6 +115,73 @@ __FBSDID("$FreeBSD$"); #define MB (1024UL * 1024) #define GB (1024UL * MB) +static const char * const vmx_exit_reason_desc[] = { + [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", + [EXIT_REASON_EXT_INTR] = "External interrupt", + [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", + [EXIT_REASON_INIT] = "INIT signal", + [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", + [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", + [EXIT_REASON_SMI] = "Other SMI", + [EXIT_REASON_INTR_WINDOW] = "Interrupt window", + [EXIT_REASON_NMI_WINDOW] = "NMI window", + [EXIT_REASON_TASK_SWITCH] = "Task switch", + [EXIT_REASON_CPUID] = "CPUID", + [EXIT_REASON_GETSEC] = "GETSEC", + [EXIT_REASON_HLT] = "HLT", + [EXIT_REASON_INVD] = "INVD", + [EXIT_REASON_INVLPG] = "INVLPG", + [EXIT_REASON_RDPMC] = "RDPMC", + [EXIT_REASON_RDTSC] = "RDTSC", + [EXIT_REASON_RSM] = "RSM", + [EXIT_REASON_VMCALL] = "VMCALL", + [EXIT_REASON_VMCLEAR] = "VMCLEAR", + [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", + [EXIT_REASON_VMPTRLD] = "VMPTRLD", + [EXIT_REASON_VMPTRST] = "VMPTRST", + [EXIT_REASON_VMREAD] = "VMREAD", + [EXIT_REASON_VMRESUME] = "VMRESUME", + [EXIT_REASON_VMWRITE] = "VMWRITE", + [EXIT_REASON_VMXOFF] = "VMXOFF", + [EXIT_REASON_VMXON] = "VMXON", + [EXIT_REASON_CR_ACCESS] = "Control-register accesses", + [EXIT_REASON_DR_ACCESS] = "MOV DR", + [EXIT_REASON_INOUT] = "I/O instruction", + [EXIT_REASON_RDMSR] = "RDMSR", + [EXIT_REASON_WRMSR] = "WRMSR", + [EXIT_REASON_INVAL_VMCS] = + "VM-entry failure due to invalid guest state", + [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", + [EXIT_REASON_MWAIT] = "MWAIT", + [EXIT_REASON_MTF] = "Monitor trap flag", + [EXIT_REASON_MONITOR] = "MONITOR", + [EXIT_REASON_PAUSE] = "PAUSE", + [EXIT_REASON_MCE_DURING_ENTRY] = + "VM-entry failure due to machine-check event", + [EXIT_REASON_TPR] = "TPR below threshold", + [EXIT_REASON_APIC_ACCESS] = "APIC access", + [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", + [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", + [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", + [EXIT_REASON_EPT_FAULT] = "EPT violation", + [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", + [EXIT_REASON_INVEPT] = "INVEPT", + [EXIT_REASON_RDTSCP] = "RDTSCP", + [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", + [EXIT_REASON_INVVPID] = "INVVPID", + [EXIT_REASON_WBINVD] = "WBINVD", + [EXIT_REASON_XSETBV] = "XSETBV", + [EXIT_REASON_APIC_WRITE] = "APIC write", + [EXIT_REASON_RDRAND] = "RDRAND", + [EXIT_REASON_INVPCID] = "INVPCID", + [EXIT_REASON_VMFUNC] = "VMFUNC", + [EXIT_REASON_ENCLS] = "ENCLS", + [EXIT_REASON_RDSEED] = "RDSEED", + [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", + [EXIT_REASON_XSAVES] = "XSAVES", + [EXIT_REASON_XRSTORS] = "XRSTORS" +}; + typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); @@ -594,14 +667,22 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) #define DEBUG_EPT_MISCONFIG #ifdef DEBUG_EPT_MISCONFIG -#define EXIT_REASON_EPT_MISCONFIG 49 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 -#define VMCS_IDENT(x) ((x) | 0x80000000) static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; static int ept_misconfig_ptenum; #endif +static const char * +vmexit_vmx_desc(uint32_t exit_reason) +{ + + if (exit_reason >= nitems(vmx_exit_reason_desc) || + vmx_exit_reason_desc[exit_reason] == NULL) + return ("Unknown"); + return (vmx_exit_reason_desc[exit_reason]); +} + static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -611,7 +692,8 @@ vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); - fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); + fprintf(stderr, "\texit_reason\t%u (%s)\n", vmexit->u.vmx.exit_reason, + vmexit_vmx_desc(vmexit->u.vmx.exit_reason)); fprintf(stderr, "\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); @@ -962,15 +1044,13 @@ do_open(const char *vmname) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); - if (cap_rights_limit(vm_get_device_fd(ctx), &rights) == -1 && - errno != ENOSYS) + if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); vm_get_ioctls(&ncmds); cmds = vm_get_ioctls(NULL); if (cmds == NULL) errx(EX_OSERR, "out of memory"); - if (cap_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1 && - errno != ENOSYS) + if (caph_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); free((cap_ioctl_t *)cmds); #endif diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c index 8ac15dbd33..fcb4149b62 100644 --- a/usr/src/cmd/bhyve/block_if.c +++ b/usr/src/cmd/bhyve/block_if.c @@ -51,6 +51,9 @@ __FBSDID("$FreeBSD$"); #endif #include <assert.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <fcntl.h> #include <stdio.h> @@ -459,8 +462,10 @@ blockif_open(const char *optstr, const char *ident) char tname[MAXCOMLEN + 1]; #ifdef __FreeBSD__ char name[MAXPATHLEN]; -#endif char *nopt, *xopts, *cp; +#else + char *nopt, *xopts, *cp = NULL; +#endif struct blockif_ctxt *bc; struct stat sbuf; #ifdef __FreeBSD__ @@ -538,7 +543,7 @@ blockif_open(const char *optstr, const char *ident) if (ro) cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); - if (cap_rights_limit(fd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -628,7 +633,7 @@ blockif_open(const char *optstr, const char *ident) #endif #ifndef WITHOUT_CAPSICUM - if (cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1 && errno != ENOSYS) + if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -700,6 +705,13 @@ blockif_open(const char *optstr, const char *ident) err: if (fd >= 0) close(fd); +#ifdef __FreeBSD__ + free(cp); + free(xopts); + free(nopt); +#else + free(nopt); +#endif return (NULL); } diff --git a/usr/src/cmd/bhyve/consport.c b/usr/src/cmd/bhyve/consport.c index f630cec1f3..cda2df2414 100644 --- a/usr/src/cmd/bhyve/consport.c +++ b/usr/src/cmd/bhyve/consport.c @@ -37,6 +37,9 @@ __FBSDID("$FreeBSD$"); #endif #include <sys/select.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <stdio.h> @@ -142,11 +145,9 @@ console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE); - if (cap_rights_limit(STDIN_FILENO, &rights) == -1 && - errno != ENOSYS) + if (caph_rights_limit(STDIN_FILENO, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_ioctls_limit(STDIN_FILENO, cmds, nitems(cmds)) == -1 && - errno != ENOSYS) + if (caph_ioctls_limit(STDIN_FILENO, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif ttyopen(); diff --git a/usr/src/cmd/bhyve/dbgport.c b/usr/src/cmd/bhyve/dbgport.c index 6b3d26336f..88a616b50d 100644 --- a/usr/src/cmd/bhyve/dbgport.c +++ b/usr/src/cmd/bhyve/dbgport.c @@ -40,6 +40,9 @@ __FBSDID("$FreeBSD$"); #include <netinet/tcp.h> #include <sys/uio.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <stdio.h> #include <stdlib.h> @@ -169,7 +172,7 @@ init_dbgport(int sport) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(listen_fd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(listen_fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/gdb.c b/usr/src/cmd/bhyve/gdb.c index 8f464816f0..69bcf53c31 100644 --- a/usr/src/cmd/bhyve/gdb.c +++ b/usr/src/cmd/bhyve/gdb.c @@ -1269,9 +1269,9 @@ limit_gdb_socket(int s) cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE, CAP_SETSOCKOPT, CAP_IOCTL); - if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(s, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_ioctls_limit(s, ioctls, nitems(ioctls)) == -1 && errno != ENOSYS) + if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); } #endif diff --git a/usr/src/cmd/bhyve/iov.c b/usr/src/cmd/bhyve/iov.c index c564bd8ae5..54ea22aa94 100644 --- a/usr/src/cmd/bhyve/iov.c +++ b/usr/src/cmd/bhyve/iov.c @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>. + * Copyright (c) 2018 Alexander Motin <mav@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,12 +40,12 @@ __FBSDID("$FreeBSD$"); #include "iov.h" void -seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, size_t *niov2, +seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, int *niov2, size_t seek) { size_t remainder = 0; size_t left = seek; - size_t i, j; + int i, j; for (i = 0; i < niov1; i++) { size_t toseek = MIN(left, iov1[i].iov_len); @@ -69,9 +70,10 @@ seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, size_t *niov2, } size_t -count_iov(struct iovec *iov, size_t niov) +count_iov(const struct iovec *iov, int niov) { - size_t i, total = 0; + size_t total = 0; + int i; for (i = 0; i < niov; i++) total += iov[i].iov_len; @@ -79,35 +81,36 @@ count_iov(struct iovec *iov, size_t niov) return (total); } -size_t -truncate_iov(struct iovec *iov, size_t niov, size_t length) +void +truncate_iov(struct iovec *iov, int *niov, size_t length) { - size_t i, done = 0; + size_t done = 0; + int i; - for (i = 0; i < niov; i++) { + for (i = 0; i < *niov; i++) { size_t toseek = MIN(length - done, iov[i].iov_len); done += toseek; - if (toseek < iov[i].iov_len) { + if (toseek <= iov[i].iov_len) { iov[i].iov_len = toseek; - return (i + 1); + *niov = i + 1; + return; } } - - return (niov); } ssize_t -iov_to_buf(struct iovec *iov, size_t niov, void **buf) +iov_to_buf(const struct iovec *iov, int niov, void **buf) { - size_t i, ptr = 0, total = 0; + size_t ptr, total; + int i; - for (i = 0; i < niov; i++) { - total += iov[i].iov_len; - *buf = realloc(*buf, total); - if (*buf == NULL) - return (-1); + total = count_iov(iov, niov); + *buf = realloc(*buf, total); + if (*buf == NULL) + return (-1); + for (i = 0, ptr = 0; i < niov; i++) { memcpy(*buf + ptr, iov[i].iov_base, iov[i].iov_len); ptr += iov[i].iov_len; } @@ -116,12 +119,12 @@ iov_to_buf(struct iovec *iov, size_t niov, void **buf) } ssize_t -buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov, +buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov, size_t seek) { struct iovec *diov; - size_t ndiov, i; - uintptr_t off = 0; + int ndiov, i; + size_t off = 0, len; if (seek > 0) { diov = malloc(sizeof(struct iovec) * niov); @@ -131,11 +134,15 @@ buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov, ndiov = niov; } - for (i = 0; i < ndiov; i++) { - memcpy(diov[i].iov_base, buf + off, diov[i].iov_len); - off += diov[i].iov_len; + for (i = 0; i < ndiov && off < buflen; i++) { + len = MIN(diov[i].iov_len, buflen - off); + memcpy(diov[i].iov_base, buf + off, len); + off += len; } + if (seek > 0) + free(diov); + return ((ssize_t)off); } diff --git a/usr/src/cmd/bhyve/iov.h b/usr/src/cmd/bhyve/iov.h index 87fa4c1dcf..e3b5916edb 100644 --- a/usr/src/cmd/bhyve/iov.h +++ b/usr/src/cmd/bhyve/iov.h @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>. + * Copyright (c) 2018 Alexander Motin <mav@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,12 +33,12 @@ #ifndef _IOV_H_ #define _IOV_H_ -void seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, - size_t *niov2, size_t seek); -size_t truncate_iov(struct iovec *iov, size_t niov, size_t length); -size_t count_iov(struct iovec *iov, size_t niov); -ssize_t iov_to_buf(struct iovec *iov, size_t niov, void **buf); -ssize_t buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov, +void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, + int *niov2, size_t seek); +void truncate_iov(struct iovec *iov, int *niov, size_t length); +size_t count_iov(const struct iovec *iov, int niov); +ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); +ssize_t buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov, size_t seek); #endif /* _IOV_H_ */ diff --git a/usr/src/cmd/bhyve/mevent.c b/usr/src/cmd/bhyve/mevent.c index 4ad33a9f01..a258fd3047 100644 --- a/usr/src/cmd/bhyve/mevent.c +++ b/usr/src/cmd/bhyve/mevent.c @@ -41,6 +41,9 @@ __FBSDID("$FreeBSD$"); #include <assert.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <stdlib.h> @@ -599,7 +602,7 @@ mevent_dispatch(void) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_KQUEUE); - if (cap_rights_limit(mfd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(mfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -616,9 +619,9 @@ mevent_dispatch(void) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(mevent_pipefd[0], &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_rights_limit(mevent_pipefd[1], &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/pci_ahci.c b/usr/src/cmd/bhyve/pci_ahci.c index 39b441d876..1e3feffcc2 100644 --- a/usr/src/cmd/bhyve/pci_ahci.c +++ b/usr/src/cmd/bhyve/pci_ahci.c @@ -105,7 +105,7 @@ enum sata_fis_type { * ATA commands */ #define ATA_SF_ENAB_SATA_SF 0x10 -#define ATA_SATA_SF_AN 0x05 +#define ATA_SATA_SF_AN 0x05 #define ATA_SF_DIS_SATA_SF 0x90 /* @@ -119,6 +119,8 @@ static FILE *dbg; #endif #define WPRINTF(format, arg...) printf(format, ##arg) +#define AHCI_PORT_IDENT 20 + 1 + struct ahci_ioreq { struct blockif_req io_req; struct ahci_port *io_pr; @@ -136,7 +138,7 @@ struct ahci_port { struct pci_ahci_softc *pr_sc; uint8_t *cmd_lst; uint8_t *rfis; - char ident[20 + 1]; + char ident[AHCI_PORT_IDENT]; int port; int atapi; int reset; @@ -2385,7 +2387,8 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi) MD5Init(&mdctx); MD5Update(&mdctx, opts, strlen(opts)); MD5Final(digest, &mdctx); - sprintf(sc->port[p].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", + snprintf(sc->port[p].ident, AHCI_PORT_IDENT, + "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); diff --git a/usr/src/cmd/bhyve/pci_e82545.c b/usr/src/cmd/bhyve/pci_e82545.c index 3f5a6ef0c5..e211b5cf9c 100644 --- a/usr/src/cmd/bhyve/pci_e82545.c +++ b/usr/src/cmd/bhyve/pci_e82545.c @@ -46,6 +46,9 @@ __FBSDID("$FreeBSD$"); #include <sys/filio.h> #endif +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -2265,7 +2268,7 @@ e82545_open_tap(struct e82545_softc *sc, char *opts) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(sc->esc_tapfd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(sc->esc_tapfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/pci_fbuf.c b/usr/src/cmd/bhyve/pci_fbuf.c index 5a04c41e54..8d24dde9da 100644 --- a/usr/src/cmd/bhyve/pci_fbuf.c +++ b/usr/src/cmd/bhyve/pci_fbuf.c @@ -121,8 +121,9 @@ static void pci_fbuf_usage(char *opt) { - fprintf(stderr, "Invalid fbuf emulation \"%s\"\r\n", opt); - fprintf(stderr, "fbuf: {wait,}{vga=on|io|off,}rfb=<ip>:port\r\n"); + fprintf(stderr, "Invalid fbuf emulation option \"%s\"\r\n", opt); + fprintf(stderr, "fbuf: {wait,}{vga=on|io|off,}rfb=<ip>:port" + "{,w=width}{,h=height}\r\n"); } static void @@ -254,13 +255,33 @@ pci_fbuf_parse_opts(struct pci_fbuf_softc *sc, char *opts) xopts, config)); if (!strcmp(xopts, "tcp") || !strcmp(xopts, "rfb")) { - /* parse host-ip:port */ - tmpstr = strsep(&config, ":"); - if (!config) - sc->rfb_port = atoi(tmpstr); - else { - sc->rfb_port = atoi(config); + /* + * IPv4 -- host-ip:port + * IPv6 -- [host-ip%zone]:port + * XXX for now port is mandatory. + */ + tmpstr = strsep(&config, "]"); + if (config) { + if (tmpstr[0] == '[') + tmpstr++; sc->rfb_host = tmpstr; + if (config[0] == ':') + config++; + else { + pci_fbuf_usage(xopts); + ret = -1; + goto done; + } + sc->rfb_port = atoi(config); + } else { + config = tmpstr; + tmpstr = strsep(&config, ":"); + if (!config) + sc->rfb_port = atoi(tmpstr); + else { + sc->rfb_port = atoi(config); + sc->rfb_host = tmpstr; + } } #ifndef __FreeBSD__ } else if (!strcmp(xopts, "unix")) { @@ -276,7 +297,7 @@ pci_fbuf_parse_opts(struct pci_fbuf_softc *sc, char *opts) sc->vga_enabled = 1; sc->vga_full = 1; } else { - pci_fbuf_usage(opts); + pci_fbuf_usage(xopts); ret = -1; goto done; } diff --git a/usr/src/cmd/bhyve/pci_nvme.c b/usr/src/cmd/bhyve/pci_nvme.c index a274b19b0b..387611c888 100644 --- a/usr/src/cmd/bhyve/pci_nvme.c +++ b/usr/src/cmd/bhyve/pci_nvme.c @@ -93,6 +93,16 @@ static int nvme_debug = 0; /* helpers */ +/* Convert a zero-based value into a one-based value */ +#define ONE_BASED(zero) ((zero) + 1) +/* Convert a one-based value into a zero-based value */ +#define ZERO_BASED(one) ((one) - 1) + +/* Encode number of SQ's and CQ's for Set/Get Features */ +#define NVME_FEATURE_NUM_QUEUES(sc) \ + (ZERO_BASED((sc)->num_squeues) & 0xffff) | \ + (ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16; + #define NVME_DOORBELL_OFFSET offsetof(struct nvme_registers, doorbell) enum nvme_controller_register_offsets { @@ -192,8 +202,8 @@ struct pci_nvme_softc { struct pci_nvme_blockstore nvstore; - uint16_t max_qentries; /* max entries per queue */ - uint32_t max_queues; + uint16_t max_qentries; /* max entries per queue */ + uint32_t max_queues; /* max number of IO SQ's or CQ's */ uint32_t num_cqueues; uint32_t num_squeues; @@ -203,7 +213,10 @@ struct pci_nvme_softc { uint32_t ioslots; sem_t iosemlock; - /* status and guest memory mapped queues */ + /* + * Memory mapped Submission and Completion queues + * Each array includes both Admin and IO queues + */ struct nvme_completion_queue *compl_queues; struct nvme_submission_queue *submit_queues; @@ -251,11 +264,13 @@ static void pci_nvme_io_partial(struct blockif_req *br, int err); (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT)) static __inline void -cpywithpad(char *dst, int dst_size, const char *src, char pad) +cpywithpad(char *dst, size_t dst_size, const char *src, char pad) { - int len = strnlen(src, dst_size); + size_t len; + + len = strnlen(src, dst_size); + memset(dst, pad, dst_size); memcpy(dst, src, len); - memset(dst + len, pad, dst_size - len); } static __inline void @@ -355,7 +370,7 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc) { DPRINTF(("%s\r\n", __func__)); - sc->regs.cap_lo = (sc->max_qentries & NVME_CAP_LO_REG_MQES_MASK) | + sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) | (1 << NVME_CAP_LO_REG_CQR_SHIFT) | (60 << NVME_CAP_LO_REG_TO_SHIFT); @@ -368,7 +383,7 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc) sc->num_cqueues = sc->num_squeues = sc->max_queues; if (sc->submit_queues != NULL) { - for (int i = 0; i <= sc->max_queues; i++) { + for (int i = 0; i < sc->num_squeues + 1; i++) { /* * The Admin Submission Queue is at index 0. * It must not be changed at reset otherwise the @@ -378,26 +393,31 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc) sc->submit_queues[i].qbase = NULL; sc->submit_queues[i].size = 0; sc->submit_queues[i].cqid = 0; - - sc->compl_queues[i].qbase = NULL; - sc->compl_queues[i].size = 0; } sc->submit_queues[i].tail = 0; sc->submit_queues[i].head = 0; sc->submit_queues[i].busy = 0; - - sc->compl_queues[i].tail = 0; - sc->compl_queues[i].head = 0; } } else - sc->submit_queues = calloc(sc->max_queues + 1, + sc->submit_queues = calloc(sc->num_squeues + 1, sizeof(struct nvme_submission_queue)); - if (sc->compl_queues == NULL) { - sc->compl_queues = calloc(sc->max_queues + 1, + if (sc->compl_queues != NULL) { + for (int i = 0; i < sc->num_cqueues + 1; i++) { + /* See Admin Submission Queue note above */ + if (i != 0) { + sc->compl_queues[i].qbase = NULL; + sc->compl_queues[i].size = 0; + } + + sc->compl_queues[i].tail = 0; + sc->compl_queues[i].head = 0; + } + } else { + sc->compl_queues = calloc(sc->num_cqueues + 1, sizeof(struct nvme_completion_queue)); - for (int i = 0; i <= sc->num_cqueues; i++) + for (int i = 0; i < sc->num_cqueues + 1; i++) pthread_mutex_init(&sc->compl_queues[i].mtx, NULL); } } @@ -441,7 +461,7 @@ nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, uint16_t qid = command->cdw10 & 0xffff; DPRINTF(("%s DELETE_IO_SQ %u\r\n", __func__, qid)); - if (qid == 0 || qid > sc->num_cqueues) { + if (qid == 0 || qid > sc->num_squeues) { WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u\r\n", __func__, qid, sc->num_squeues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, @@ -462,7 +482,7 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, uint16_t qid = command->cdw10 & 0xffff; struct nvme_submission_queue *nsq; - if (qid > sc->num_squeues) { + if ((qid == 0) || (qid > sc->num_squeues)) { WPRINTF(("%s queue index %u > num_squeues %u\r\n", __func__, qid, sc->num_squeues)); pci_nvme_status_tc(&compl->status, @@ -472,7 +492,7 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, } nsq = &sc->submit_queues[qid]; - nsq->size = ((command->cdw10 >> 16) & 0xffff) + 1; + nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(struct nvme_command) * (size_t)nsq->size); @@ -527,7 +547,7 @@ nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, uint16_t qid = command->cdw10 & 0xffff; struct nvme_completion_queue *ncq; - if (qid > sc->num_cqueues) { + if ((qid == 0) || (qid > sc->num_cqueues)) { WPRINTF(("%s queue index %u > num_cqueues %u\r\n", __func__, qid, sc->num_cqueues)); pci_nvme_status_tc(&compl->status, @@ -539,7 +559,7 @@ nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, ncq = &sc->compl_queues[qid]; ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1; ncq->intr_vec = (command->cdw11 >> 16) & 0xffff; - ncq->size = ((command->cdw10 >> 16) & 0xffff) + 1; + ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, @@ -652,6 +672,45 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, } static int +nvme_set_feature_queues(struct pci_nvme_softc* sc, struct nvme_command* command, + struct nvme_completion* compl) +{ + uint16_t nqr; /* Number of Queues Requested */ + + nqr = command->cdw11 & 0xFFFF; + if (nqr == 0xffff) { + WPRINTF(("%s: Illegal NSQR value %#x\n", __func__, nqr)); + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + return (-1); + } + + sc->num_squeues = ONE_BASED(nqr); + if (sc->num_squeues > sc->max_queues) { + DPRINTF(("NSQR=%u is greater than max %u\n", sc->num_squeues, + sc->max_queues)); + sc->num_squeues = sc->max_queues; + } + + nqr = (command->cdw11 >> 16) & 0xFFFF; + if (nqr == 0xffff) { + WPRINTF(("%s: Illegal NCQR value %#x\n", __func__, nqr)); + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + return (-1); + } + + sc->num_cqueues = ONE_BASED(nqr); + if (sc->num_cqueues > sc->max_queues) { + DPRINTF(("NCQR=%u is greater than max %u\n", sc->num_cqueues, + sc->max_queues)); + sc->num_cqueues = sc->max_queues; + } + + compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); + + return (0); +} + +static int nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { @@ -681,19 +740,7 @@ nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, DPRINTF((" volatile write cache 0x%x\r\n", command->cdw11)); break; case NVME_FEAT_NUMBER_OF_QUEUES: - sc->num_squeues = command->cdw11 & 0xFFFF; - sc->num_cqueues = (command->cdw11 >> 16) & 0xFFFF; - DPRINTF((" number of queues (submit %u, completion %u)\r\n", - sc->num_squeues, sc->num_cqueues)); - - if (sc->num_squeues == 0 || sc->num_squeues > sc->max_queues) - sc->num_squeues = sc->max_queues; - if (sc->num_cqueues == 0 || sc->num_cqueues > sc->max_queues) - sc->num_cqueues = sc->max_queues; - - compl->cdw0 = (sc->num_squeues & 0xFFFF) | - ((sc->num_cqueues & 0xFFFF) << 16); - + nvme_set_feature_queues(sc, command, compl); break; case NVME_FEAT_INTERRUPT_COALESCING: DPRINTF((" interrupt coalescing 0x%x\r\n", command->cdw11)); @@ -709,7 +756,7 @@ nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, DPRINTF((" interrupt vector configuration 0x%x\r\n", command->cdw11)); - for (uint32_t i = 0; i <= sc->num_cqueues; i++) { + for (uint32_t i = 0; i < sc->num_cqueues + 1; i++) { if (sc->compl_queues[i].intr_vec == iv) { if (command->cdw11 & (1 << 16)) sc->compl_queues[i].intr_en |= @@ -791,16 +838,7 @@ nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, DPRINTF((" volatile write cache\r\n")); break; case NVME_FEAT_NUMBER_OF_QUEUES: - compl->cdw0 = 0; - if (sc->num_squeues == 0) - compl->cdw0 |= sc->max_queues & 0xFFFF; - else - compl->cdw0 |= sc->num_squeues & 0xFFFF; - - if (sc->num_cqueues == 0) - compl->cdw0 |= (sc->max_queues & 0xFFFF) << 16; - else - compl->cdw0 |= (sc->num_cqueues & 0xFFFF) << 16; + compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); DPRINTF((" number of queues (submit %u, completion %u)\r\n", compl->cdw0 & 0xFFFF, @@ -954,6 +992,7 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) cq = &sc->compl_queues[0]; cp = &(cq->qbase)[cq->tail]; + cp->cdw0 = compl.cdw0; cp->sqid = 0; cp->sqhd = sqhead; cp->cid = cmd->cid; @@ -1819,7 +1858,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* allocate size of nvme registers + doorbell space for all queues */ pci_membar_sz = sizeof(struct nvme_registers) + - 2*sizeof(uint32_t)*(sc->max_queues); + 2*sizeof(uint32_t)*(sc->max_queues + 1); DPRINTF(("nvme membar size: %u\r\n", pci_membar_sz)); @@ -1829,7 +1868,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) goto done; } - error = pci_emul_add_msixcap(pi, sc->max_queues, NVME_MSIX_BAR); + error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR); if (error) { WPRINTF(("%s pci add msixcap failed\r\n", __func__)); goto done; diff --git a/usr/src/cmd/bhyve/pci_passthru.c b/usr/src/cmd/bhyve/pci_passthru.c index 2ed490c71a..3782914cd5 100644 --- a/usr/src/cmd/bhyve/pci_passthru.c +++ b/usr/src/cmd/bhyve/pci_passthru.c @@ -47,6 +47,9 @@ __FBSDID("$FreeBSD$"); #include <machine/iodev.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <stdio.h> #include <stdlib.h> #include <string.h> diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c index 4040ed8305..b0c3b06187 100644 --- a/usr/src/cmd/bhyve/pci_virtio_block.c +++ b/usr/src/cmd/bhyve/pci_virtio_block.c @@ -79,7 +79,7 @@ __FBSDID("$FreeBSD$"); #define VTBLK_S_IOERR 1 #define VTBLK_S_UNSUPP 2 -#define VTBLK_BLK_ID_BYTES 20 +#define VTBLK_BLK_ID_BYTES 20 + 1 /* Capability bits */ #define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */ @@ -391,7 +391,8 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) MD5Init(&mdctx); MD5Update(&mdctx, opts, strlen(opts)); MD5Final(digest, &mdctx); - sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", + snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES, + "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); /* setup virtio block config space */ diff --git a/usr/src/cmd/bhyve/pci_virtio_console.c b/usr/src/cmd/bhyve/pci_virtio_console.c index e1448780f1..90437662df 100644 --- a/usr/src/cmd/bhyve/pci_virtio_console.c +++ b/usr/src/cmd/bhyve/pci_virtio_console.c @@ -47,6 +47,9 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <sys/un.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -347,7 +350,7 @@ pci_vtcon_sock_add(struct pci_vtcon_softc *sc, const char *name, #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(s, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c index f5eadf4a2c..74efbcaee1 100644 --- a/usr/src/cmd/bhyve/pci_virtio_net.c +++ b/usr/src/cmd/bhyve/pci_virtio_net.c @@ -61,6 +61,9 @@ __FBSDID("$FreeBSD$"); #include <net/netmap_user.h> #endif +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -881,7 +884,7 @@ pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(sc->vsc_tapfd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(sc->vsc_tapfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/pci_virtio_rnd.c b/usr/src/cmd/bhyve/pci_virtio_rnd.c index 44bc55e003..5f470c03a6 100644 --- a/usr/src/cmd/bhyve/pci_virtio_rnd.c +++ b/usr/src/cmd/bhyve/pci_virtio_rnd.c @@ -43,6 +43,9 @@ __FBSDID("$FreeBSD$"); #include <sys/linker_set.h> #include <sys/uio.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -158,7 +161,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_READ); - if (cap_rights_limit(fd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -168,6 +171,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) len = read(fd, &v, sizeof(v)); if (len <= 0) { WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len)); + close(fd); return (1); } diff --git a/usr/src/cmd/bhyve/pci_virtio_scsi.c b/usr/src/cmd/bhyve/pci_virtio_scsi.c index aa906bb854..238f07398b 100644 --- a/usr/src/cmd/bhyve/pci_virtio_scsi.c +++ b/usr/src/cmd/bhyve/pci_virtio_scsi.c @@ -105,7 +105,6 @@ struct pci_vtscsi_config { struct pci_vtscsi_queue { struct pci_vtscsi_softc * vsq_sc; struct vqueue_info * vsq_vq; - int vsq_ctl_fd; pthread_mutex_t vsq_mtx; pthread_mutex_t vsq_qmtx; pthread_cond_t vsq_cv; @@ -389,7 +388,7 @@ pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, ctl_scsi_zero_io(io); io->io_hdr.io_type = CTL_IO_TASK; - io->io_hdr.nexus.targ_port = tmf->lun[1]; + io->io_hdr.nexus.initid = sc->vss_iid; io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun); io->taskio.tag_type = CTL_TAG_SIMPLE; io->taskio.tag_num = (uint32_t)tmf->id; @@ -462,7 +461,7 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, struct pci_vtscsi_req_cmd_wr *cmd_wr; struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; union ctl_io *io; - size_t data_niov_in, data_niov_out; + int data_niov_in, data_niov_out; void *ext_data_ptr = NULL; uint32_t ext_data_len = 0, ext_sg_entries = 0; int err; @@ -472,15 +471,15 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, VTSCSI_OUT_HEADER_LEN(sc)); - truncate_iov(iov_in, niov_in, VTSCSI_IN_HEADER_LEN(sc)); - truncate_iov(iov_out, niov_out, VTSCSI_OUT_HEADER_LEN(sc)); + truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); + truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc)); io = ctl_scsi_alloc_io(sc->vss_iid); ctl_scsi_zero_io(io); - io->io_hdr.nexus.targ_port = cmd_rd->lun[1]; + io->io_hdr.nexus.initid = sc->vss_iid; io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun); io->io_hdr.io_type = CTL_IO_SCSI; @@ -499,7 +498,21 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, io->scsiio.sense_len = sc->vss_config.sense_size; io->scsiio.tag_num = (uint32_t)cmd_rd->id; - io->scsiio.tag_type = CTL_TAG_SIMPLE; + switch (cmd_rd->task_attr) { + case VIRTIO_SCSI_S_ORDERED: + io->scsiio.tag_type = CTL_TAG_ORDERED; + break; + case VIRTIO_SCSI_S_HEAD: + io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; + break; + case VIRTIO_SCSI_S_ACA: + io->scsiio.tag_type = CTL_TAG_ACA; + break; + case VIRTIO_SCSI_S_SIMPLE: + default: + io->scsiio.tag_type = CTL_TAG_SIMPLE; + break; + } io->scsiio.ext_sg_entries = ext_sg_entries; io->scsiio.ext_data_ptr = ext_data_ptr; io->scsiio.ext_data_len = ext_data_len; @@ -515,7 +528,7 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, sbuf_delete(sb); } - err = ioctl(q->vsq_ctl_fd, CTL_IO, io); + err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) { WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno))); cmd_wr->response = VIRTIO_SCSI_S_FAILURE; @@ -552,7 +565,8 @@ pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, NULL); bufsize = iov_to_buf(iov, n, &buf); iolen = pci_vtscsi_control_handle(sc, buf, bufsize); - buf_to_iov(buf + bufsize - iolen, iolen, iov, n, iolen); + buf_to_iov(buf + bufsize - iolen, iolen, iov, n, + bufsize - iolen); /* * Release this chain and handle more @@ -560,6 +574,7 @@ pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) vq_relchain(vq, idx, iolen); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ + free(buf); } static void @@ -623,14 +638,8 @@ pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, int i; queue->vsq_sc = sc; - queue->vsq_ctl_fd = open("/dev/cam/ctl", O_RDWR); queue->vsq_vq = &sc->vss_vq[num + 2]; - if (queue->vsq_ctl_fd < 0) { - WPRINTF(("cannot open /dev/cam/ctl: %s\n", strerror(errno))); - return (-1); - } - pthread_mutex_init(&queue->vsq_mtx, NULL); pthread_mutex_init(&queue->vsq_qmtx, NULL); pthread_cond_init(&queue->vsq_cv, NULL); @@ -656,26 +665,36 @@ static int pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct pci_vtscsi_softc *sc; - char *optname = NULL; - char *opt; - int i; + char *opt, *optname; + const char *devname; + int i, optidx = 0; sc = calloc(1, sizeof(struct pci_vtscsi_softc)); - sc->vss_ctl_fd = open("/dev/cam/ctl", O_RDWR); + devname = "/dev/cam/ctl"; + while ((opt = strsep(&opts, ",")) != NULL) { + optname = strsep(&opt, "="); + if (opt == NULL && optidx == 0) { + if (optname[0] != 0) + devname = optname; + } else if (strcmp(optname, "dev") == 0 && opt != NULL) { + devname = opt; + } else if (strcmp(optname, "iid") == 0 && opt != NULL) { + sc->vss_iid = strtoul(opt, NULL, 10); + } else { + fprintf(stderr, "Invalid option %s\n", optname); + free(sc); + return (1); + } + optidx++; + } + sc->vss_ctl_fd = open(devname, O_RDWR); if (sc->vss_ctl_fd < 0) { - WPRINTF(("cannot open /dev/cam/ctl: %s\n", strerror(errno))); + WPRINTF(("cannot open %s: %s\n", devname, strerror(errno))); + free(sc); return (1); } - while ((opt = strsep(&opts, ",")) != NULL) { - if ((optname = strsep(&opt, "=")) != NULL) { - if (strcmp(optname, "iid") == 0) { - sc->vss_iid = strtoul(opt, NULL, 10); - } - } - } - vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq); sc->vss_vs.vs_mtx = &sc->vss_mtx; diff --git a/usr/src/cmd/bhyve/pci_xhci.c b/usr/src/cmd/bhyve/pci_xhci.c index be87453bf1..988e6933cc 100644 --- a/usr/src/cmd/bhyve/pci_xhci.c +++ b/usr/src/cmd/bhyve/pci_xhci.c @@ -2640,7 +2640,11 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) struct pci_xhci_dev_emu *dev; struct usb_devemu *ue; void *devsc; +#ifdef __FreeBSD__ char *uopt, *xopts, *config; +#else + char *uopt = NULL, *xopts, *config; +#endif int usb3_port, usb2_port, i; usb3_port = sc->usb3_port_start - 1; @@ -2717,6 +2721,10 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) sc->ndevices++; } +#ifdef __FreeBSD__ + if (uopt != NULL) + free(uopt); +#endif portsfinal: sc->portregs = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_portregs)); @@ -2746,6 +2754,7 @@ done: free(devices); } } + free(uopt); return (sc->ndevices); } diff --git a/usr/src/cmd/bhyve/ps2kbd.c b/usr/src/cmd/bhyve/ps2kbd.c index ae82957ffa..5453a26949 100644 --- a/usr/src/cmd/bhyve/ps2kbd.c +++ b/usr/src/cmd/bhyve/ps2kbd.c @@ -76,6 +76,107 @@ struct ps2kbd_softc { uint8_t curcmd; /* current command for next byte */ }; +#define SCANCODE_E0_PREFIX 1 +struct extended_translation { + uint32_t keysym; + uint8_t scancode; + int flags; +}; + +/* + * FIXME: Pause/break and Print Screen/SysRq require special handling. + */ +static const struct extended_translation extended_translations[] = { + {0xff08, 0x66}, /* Back space */ + {0xff09, 0x0d}, /* Tab */ + {0xff0d, 0x5a}, /* Return */ + {0xff1b, 0x76}, /* Escape */ + {0xff50, 0x6c, SCANCODE_E0_PREFIX}, /* Home */ + {0xff51, 0x6b, SCANCODE_E0_PREFIX}, /* Left arrow */ + {0xff52, 0x75, SCANCODE_E0_PREFIX}, /* Up arrow */ + {0xff53, 0x74, SCANCODE_E0_PREFIX}, /* Right arrow */ + {0xff54, 0x72, SCANCODE_E0_PREFIX}, /* Down arrow */ + {0xff55, 0x7d, SCANCODE_E0_PREFIX}, /* PgUp */ + {0xff56, 0x7a, SCANCODE_E0_PREFIX}, /* PgDown */ + {0xff57, 0x69, SCANCODE_E0_PREFIX}, /* End */ + {0xff63, 0x70, SCANCODE_E0_PREFIX}, /* Ins */ + {0xff8d, 0x5a, SCANCODE_E0_PREFIX}, /* Keypad Enter */ + {0xffe1, 0x12}, /* Left shift */ + {0xffe2, 0x59}, /* Right shift */ + {0xffe3, 0x14}, /* Left control */ + {0xffe4, 0x14, SCANCODE_E0_PREFIX}, /* Right control */ + /* {0xffe7, XXX}, Left meta */ + /* {0xffe8, XXX}, Right meta */ + {0xffe9, 0x11}, /* Left alt */ + {0xfe03, 0x11, SCANCODE_E0_PREFIX}, /* AltGr */ + {0xffea, 0x11, SCANCODE_E0_PREFIX}, /* Right alt */ + {0xffeb, 0x1f, SCANCODE_E0_PREFIX}, /* Left Windows */ + {0xffec, 0x27, SCANCODE_E0_PREFIX}, /* Right Windows */ + {0xffbe, 0x05}, /* F1 */ + {0xffbf, 0x06}, /* F2 */ + {0xffc0, 0x04}, /* F3 */ + {0xffc1, 0x0c}, /* F4 */ + {0xffc2, 0x03}, /* F5 */ + {0xffc3, 0x0b}, /* F6 */ + {0xffc4, 0x83}, /* F7 */ + {0xffc5, 0x0a}, /* F8 */ + {0xffc6, 0x01}, /* F9 */ + {0xffc7, 0x09}, /* F10 */ + {0xffc8, 0x78}, /* F11 */ + {0xffc9, 0x07}, /* F12 */ + {0xffff, 0x71, SCANCODE_E0_PREFIX}, /* Del */ + {0xff14, 0x7e}, /* ScrollLock */ + /* NumLock and Keypads*/ + {0xff7f, 0x77}, /* NumLock */ + {0xffaf, 0x4a, SCANCODE_E0_PREFIX}, /* Keypad slash */ + {0xffaa, 0x7c}, /* Keypad asterisk */ + {0xffad, 0x7b}, /* Keypad minus */ + {0xffab, 0x79}, /* Keypad plus */ + {0xffb7, 0x6c}, /* Keypad 7 */ + {0xff95, 0x6c}, /* Keypad home */ + {0xffb8, 0x75}, /* Keypad 8 */ + {0xff97, 0x75}, /* Keypad up arrow */ + {0xffb9, 0x7d}, /* Keypad 9 */ + {0xff9a, 0x7d}, /* Keypad PgUp */ + {0xffb4, 0x6b}, /* Keypad 4 */ + {0xff96, 0x6b}, /* Keypad left arrow */ + {0xffb5, 0x73}, /* Keypad 5 */ + {0xff9d, 0x73}, /* Keypad empty */ + {0xffb6, 0x74}, /* Keypad 6 */ + {0xff98, 0x74}, /* Keypad right arrow */ + {0xffb1, 0x69}, /* Keypad 1 */ + {0xff9c, 0x69}, /* Keypad end */ + {0xffb2, 0x72}, /* Keypad 2 */ + {0xff99, 0x72}, /* Keypad down arrow */ + {0xffb3, 0x7a}, /* Keypad 3 */ + {0xff9b, 0x7a}, /* Keypad PgDown */ + {0xffb0, 0x70}, /* Keypad 0 */ + {0xff9e, 0x70}, /* Keypad ins */ + {0xffae, 0x71}, /* Keypad . */ + {0xff9f, 0x71}, /* Keypad del */ + {0, 0, 0} /* Terminator */ +}; + +/* ASCII to type 2 scancode lookup table */ +static const uint8_t ascii_translations[128] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x29, 0x16, 0x52, 0x26, 0x25, 0x2e, 0x3d, 0x52, + 0x46, 0x45, 0x3e, 0x55, 0x41, 0x4e, 0x49, 0x4a, + 0x45, 0x16, 0x1e, 0x26, 0x25, 0x2e, 0x36, 0x3d, + 0x3e, 0x46, 0x4c, 0x4c, 0x41, 0x55, 0x49, 0x4a, + 0x1e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, + 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, + 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, + 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x36, 0x4e, + 0x0e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, + 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, + 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, + 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x0e, 0x00, +}; + static void fifo_init(struct ps2kbd_softc *sc) { @@ -212,236 +313,38 @@ static void ps2kbd_keysym_queue(struct ps2kbd_softc *sc, int down, uint32_t keysym) { - /* ASCII to type 2 scancode lookup table */ - const uint8_t translation[128] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x29, 0x16, 0x52, 0x26, 0x25, 0x2e, 0x3d, 0x52, - 0x46, 0x45, 0x3e, 0x55, 0x41, 0x4e, 0x49, 0x4a, - 0x45, 0x16, 0x1e, 0x26, 0x25, 0x2e, 0x36, 0x3d, - 0x3e, 0x46, 0x4c, 0x4c, 0x41, 0x55, 0x49, 0x4a, - 0x1e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, - 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, - 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, - 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x36, 0x4e, - 0x0e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, - 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, - 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, - 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x0e, 0x00, - }; - assert(pthread_mutex_isowned_np(&sc->mtx)); + int e0_prefix, found; + uint8_t code; + const struct extended_translation *trans; + + found = 0; + if (keysym < 0x80) { + code = ascii_translations[keysym]; + e0_prefix = 0; + found = 1; + } else { + for (trans = &(extended_translations[0]); trans->keysym != 0; + trans++) { + if (keysym == trans->keysym) { + code = trans->scancode; + e0_prefix = trans->flags & SCANCODE_E0_PREFIX; + found = 1; + break; + } + } + } - switch (keysym) { - case 0x0 ... 0x7f: - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, translation[keysym]); - break; - case 0xff08: /* Back space */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x66); - break; - case 0xff09: /* Tab */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x0d); - break; - case 0xff0d: /* Return */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x5a); - break; - case 0xff1b: /* Escape */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x76); - break; - case 0xff50: /* Home */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x6c); - break; - case 0xff51: /* Left arrow */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x6b); - break; - case 0xff52: /* Up arrow */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x75); - break; - case 0xff53: /* Right arrow */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x74); - break; - case 0xff54: /* Down arrow */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x72); - break; - case 0xff55: /* PgUp */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x7d); - break; - case 0xff56: /* PgDwn */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x7a); - break; - case 0xff57: /* End */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x69); - break; - case 0xff63: /* Ins */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x70); - break; - case 0xff8d: /* Keypad Enter */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x5a); - break; - case 0xffe1: /* Left shift */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x12); - break; - case 0xffe2: /* Right shift */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x59); - break; - case 0xffe3: /* Left control */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x14); - break; - case 0xffe4: /* Right control */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x14); - break; - case 0xffe7: /* Left meta */ - /* XXX */ - break; - case 0xffe8: /* Right meta */ - /* XXX */ - break; - case 0xffe9: /* Left alt */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x11); - break; - case 0xfe03: /* AltGr */ - case 0xffea: /* Right alt */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x11); - break; - case 0xffeb: /* Left Windows */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x1f); - break; - case 0xffec: /* Right Windows */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x27); - break; - case 0xffbe: /* F1 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x05); - break; - case 0xffbf: /* F2 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x06); - break; - case 0xffc0: /* F3 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x04); - break; - case 0xffc1: /* F4 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x0C); - break; - case 0xffc2: /* F5 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x03); - break; - case 0xffc3: /* F6 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x0B); - break; - case 0xffc4: /* F7 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x83); - break; - case 0xffc5: /* F8 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x0A); - break; - case 0xffc6: /* F9 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x01); - break; - case 0xffc7: /* F10 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x09); - break; - case 0xffc8: /* F11 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x78); - break; - case 0xffc9: /* F12 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x07); - break; - case 0xffff: /* Del */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x71); - break; - default: - fprintf(stderr, "Unhandled ps2 keyboard keysym 0x%x\n", - keysym); - break; + if (!found) { + fprintf(stderr, "Unhandled ps2 keyboard keysym 0x%x\n", keysym); + return; } + + if (e0_prefix) + fifo_put(sc, 0xe0); + if (!down) + fifo_put(sc, 0xf0); + fifo_put(sc, code); } static void diff --git a/usr/src/cmd/bhyve/rfb.c b/usr/src/cmd/bhyve/rfb.c index f761646fc7..39ea1611f9 100644 --- a/usr/src/cmd/bhyve/rfb.c +++ b/usr/src/cmd/bhyve/rfb.c @@ -43,8 +43,12 @@ __FBSDID("$FreeBSD$"); #include <machine/cpufunc.h> #include <machine/specialreg.h> #include <netinet/in.h> +#include <netdb.h> #include <assert.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <pthread.h> @@ -971,8 +975,11 @@ sse42_supported(void) int rfb_init(char *hostname, int port, int wait, char *password) { + int e; + char servname[6]; struct rfb_softc *rc; - struct sockaddr_in sin; + struct addrinfo *ai; + struct addrinfo hints; int on = 1; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; @@ -989,37 +996,49 @@ rfb_init(char *hostname, int port, int wait, char *password) rc->password = password; - rc->sfd = socket(AF_INET, SOCK_STREAM, 0); + snprintf(servname, sizeof(servname), "%d", port ? port : 5900); + + if (!hostname || strlen(hostname) == 0) +#if defined(INET) + hostname = "127.0.0.1"; +#elif defined(INET6) + hostname = "[::1]"; +#endif + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV | AI_PASSIVE; + + if ((e = getaddrinfo(hostname, servname, &hints, &ai)) != 0) { + fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e)); + return(-1); + } + + rc->sfd = socket(ai->ai_family, ai->ai_socktype, 0); if (rc->sfd < 0) { perror("socket"); + freeaddrinfo(ai); return (-1); } setsockopt(rc->sfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); -#ifdef __FreeBSD__ - sin.sin_len = sizeof(sin); -#endif - sin.sin_family = AF_INET; - sin.sin_port = port ? htons(port) : htons(5900); - if (hostname && strlen(hostname) > 0) - inet_pton(AF_INET, hostname, &(sin.sin_addr)); - else - sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - - if (bind(rc->sfd, (struct sockaddr *)&sin, sizeof(sin)) < 0) { + if (bind(rc->sfd, ai->ai_addr, ai->ai_addrlen) < 0) { perror("bind"); + freeaddrinfo(ai); return (-1); } if (listen(rc->sfd, 1) < 0) { perror("listen"); + freeaddrinfo(ai); return (-1); } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(rc->sfd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(rc->sfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -1041,6 +1060,7 @@ rfb_init(char *hostname, int port, int wait, char *password) pthread_mutex_unlock(&rc->mtx); } + freeaddrinfo(ai); return (0); } diff --git a/usr/src/cmd/bhyve/uart_emul.c b/usr/src/cmd/bhyve/uart_emul.c index ac912991f0..1027d0b0f6 100644 --- a/usr/src/cmd/bhyve/uart_emul.c +++ b/usr/src/cmd/bhyve/uart_emul.c @@ -938,14 +938,12 @@ uart_set_backend(struct uart_softc *sc, const char *opts) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE); - if (cap_rights_limit(sc->tty.fd, &rights) == -1 && - errno != ENOSYS) + if (caph_rights_limit(sc->tty.fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_ioctls_limit(sc->tty.fd, cmds, nitems(cmds)) == -1 && - errno != ENOSYS) + if (caph_ioctls_limit(sc->tty.fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (!uart_stdio) { - if (caph_limit_stdin() == -1 && errno != ENOSYS) + if (caph_limit_stdin() == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); } diff --git a/usr/src/compat/freebsd/amd64/machine/cpufunc.h b/usr/src/compat/freebsd/amd64/machine/cpufunc.h index 005a76b305..0b7bcdaa59 100644 --- a/usr/src/compat/freebsd/amd64/machine/cpufunc.h +++ b/usr/src/compat/freebsd/amd64/machine/cpufunc.h @@ -288,5 +288,24 @@ load_dr7(uint64_t dr7) __asm __volatile("movq %0,%%dr7" : : "r" (dr7)); } +#ifdef _KERNEL +/* + * Including the native sys/segments.h in userspace seriously conflicts with + * the FreeBSD compat/contrib headers. + */ +#include <sys/segments.h> + +static __inline void +lldt(u_short sel) +{ + wr_ldtr(sel); +} + +static __inline u_short +sldt() +{ + return (rd_ldtr()); +} +#endif /* _KERNEL */ #endif /* _COMPAT_FREEBSD_AMD64_MACHINE_CPUFUNC_H_ */ diff --git a/usr/src/compat/freebsd/amd64/machine/param.h b/usr/src/compat/freebsd/amd64/machine/param.h index eaca5ab8d7..b152f4d526 100644 --- a/usr/src/compat/freebsd/amd64/machine/param.h +++ b/usr/src/compat/freebsd/amd64/machine/param.h @@ -36,4 +36,6 @@ /* Size of the level 4 page-map level-4 table units */ #define NPML4EPG (PAGE_SIZE/(sizeof (pml4_entry_t))) +#define CACHE_LINE_SIZE 64 + #endif /* _COMPAT_FREEBSD_AMD64_MACHINE_PARAM_H_ */ diff --git a/usr/src/compat/freebsd/amd64/machine/specialreg.h b/usr/src/compat/freebsd/amd64/machine/specialreg.h index 59fc064a4c..e1e6543701 100644 --- a/usr/src/compat/freebsd/amd64/machine/specialreg.h +++ b/usr/src/compat/freebsd/amd64/machine/specialreg.h @@ -36,9 +36,25 @@ #undef CR4_PCE #undef CR4_VMXE #undef CR4_SMEP +#undef CR4_SMAP #undef CR4_FSGSBASE #undef CR4_PCIDE #endif /* _SYS_CONTROLREGS_H */ +#ifdef _SYS_X86_ARCHEXT_H +/* Our IA32 speculation-related defines conflict with BSD header */ +#undef IA32_ARCH_CAP_RDCL_NO +#undef IA32_ARCH_CAP_IBRS_ALL +#undef IA32_ARCH_CAP_RSBA +#undef IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY +#undef IA32_ARCH_CAP_SSB_NO +#undef IA32_SPEC_CTRL_IBRS +#undef IA32_SPEC_CTRL_STIBP +#undef IA32_SPEC_CTRL_SSBD +#undef IA32_FLUSH_CMD_L1D +#undef MSR_IA32_SPEC_CTRL +#undef MSR_IA32_PRED_CMD +#endif /* _SYS_X86_ARCHEXT_H */ + #include <x86/specialreg.h> #endif /* _COMPAT_FREEBSD_AMD64_MACHINE_SPECIALREG_H_ */ diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync index e9a2479b13..676fdd3a9d 100644 --- a/usr/src/uts/i86pc/io/vmm/README.sync +++ b/usr/src/uts/i86pc/io/vmm/README.sync @@ -1,18 +1,13 @@ The bhyve kernel module and its associated userland consumers have been updated to the latest upstream FreeBSD sources as of: -commit f81459bd8363602ed5e436f10288320419e80ccf -Author: andrew <andrew@FreeBSD.org> -Date: Thu Sep 27 11:16:19 2018 +0000 - Handle a guest executing a vm instruction by trapping and raising an - undefined instruction exception. Previously we would exit the guest, - however an unprivileged user could execute these. +commit 6b1bb0edb4792cc3d4e6b71c4a80e99438081d5d +Author: imp <imp@FreeBSD.org> +Date: Tue Feb 12 19:05:09 2019 +0000 - Found with: syzkaller - Reviewed by: araujo, tychon (previous version) - Approved by: re (kib) - MFC after: 1 week - Differential Revision: https://reviews.freebsd.org/D17192 + Revert r343077 until the license issues surrounding it can be resolved. -Which corresponds to SVN revision: 338957 + Approved by: core@ + +Which corresponds to SVN revision: 344057 diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 9c22fc2532..e921383d22 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -1965,6 +1965,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, struct vm *vm; uint64_t vmcb_pa; int handled; + uint16_t ldt_sel; svm_sc = arg; vm = svm_sc->vm; @@ -2049,6 +2050,15 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, break; } + /* + * #VMEXIT resumes the host with the guest LDTR, so + * save the current LDT selector so it can be restored + * after an exit. The userspace hypervisor probably + * doesn't use a LDT, but save and restore it to be + * safe. + */ + ldt_sel = sldt(); + svm_inj_interrupts(svm_sc, vcpu, vlapic); /* Activate the nested pmap on 'curcpu' */ @@ -2083,6 +2093,9 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, */ restore_host_tss(); + /* Restore host LDTR. */ + lldt(ldt_sel); + /* #VMEXIT disables interrupts so re-enable them here. */ enable_gintr(); diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c index 0417983233..67c43100f1 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c @@ -122,9 +122,8 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: case MSR_SYSCFG: - *result = 0; - break; case MSR_AMDK8_IPM: + case MSR_EXTFEATURES: *result = 0; break; default: @@ -163,6 +162,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu) * Ignore writes to microcode update register. */ break; + case MSR_EXTFEATURES: + break; default: error = EINVAL; break; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h index 28c5e6b15b..edde5c6dd5 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h @@ -419,6 +419,14 @@ VMPTRLD(struct vmcs *vmcs) #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_RDRAND 57 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_VMFUNC 59 +#define EXIT_REASON_ENCLS 60 +#define EXIT_REASON_RDSEED 61 +#define EXIT_REASON_PM_LOG_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 /* * NMI unblocking due to IRET. diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index d33ec7e4db..a723be0d28 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -104,7 +104,7 @@ __FBSDID("$FreeBSD$"); PROCBASED_NMI_WINDOW_EXITING) #ifdef __FreeBSD__ -#define PROCBASED_CTLS_ONE_SETTING \ +#define PROCBASED_CTLS_ONE_SETTING \ (PROCBASED_SECONDARY_CONTROLS | \ PROCBASED_MWAIT_EXITING | \ PROCBASED_MONITOR_EXITING | \ @@ -471,7 +471,7 @@ vmx_allow_x2apic_msrs(struct vmx *vmx) for (i = 0; i < 8; i++) error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i); - + for (i = 0; i < 8; i++) error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i); @@ -631,6 +631,7 @@ vmx_disable(void *arg __unused) static int vmx_cleanup(void) { + if (pirvec >= 0) lapic_ipi_free(pirvec); @@ -902,7 +903,8 @@ vmx_init(int ipinum) } #ifdef __FreeBSD__ - guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0; + guest_l1d_flush = (cpu_ia32_arch_caps & + IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0; TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush); /* @@ -1231,7 +1233,7 @@ vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) { #ifdef __FreeBSD__ int handled, func; - + func = vmxctx->guest_rax; #else int handled; @@ -3229,6 +3231,10 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, struct vm_exit *vmexit; struct vlapic *vlapic; uint32_t exit_reason; +#ifdef __FreeBSD__ + struct region_descriptor gdtr, idtr; + uint16_t ldt_sel; +#endif vmx = arg; vm = vmx->vm; @@ -3358,17 +3364,56 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, * re-VMLAUNCH as opposed to VMRESUME. */ launched = (vmx->vmcs_state[vcpu] & VS_LAUNCHED) != 0; + /* + * Restoration of the GDT limit is taken care of by + * vmx_savectx(). Since the maximum practical index for the + * IDT is 255, restoring its limits from the post-VMX-exit + * default of 0xffff is not a concern. + * + * Only 64-bit hypervisor callers are allowed, which forgoes + * the need to restore any LDT descriptor. Toss an error to + * anyone attempting to break that rule. + */ + if (curproc->p_model != DATAMODEL_LP64) { + ht_release(); + enable_intr(); + bzero(vmexit, sizeof (*vmexit)); + vmexit->rip = rip; + vmexit->exitcode = VM_EXITCODE_VMX; + vmexit->u.vmx.status = VM_FAIL_INVALID; + handled = UNHANDLED; + break; + } +#else + /* + * VM exits restore the base address but not the + * limits of GDTR and IDTR. The VMCS only stores the + * base address, so VM exits set the limits to 0xffff. + * Save and restore the full GDTR and IDTR to restore + * the limits. + * + * The VMCS does not save the LDTR at all, and VM + * exits clear LDTR as if a NULL selector were loaded. + * The userspace hypervisor probably doesn't use a + * LDT, but save and restore it to be safe. + */ + sgdt(&gdtr); + sidt(&idtr); + ldt_sel = sldt(); #endif + vmx_run_trace(vmx, vcpu); vmx_dr_enter_guest(vmxctx); rc = vmx_enter_guest(vmxctx, vmx, launched); vmx_dr_leave_guest(vmxctx); + #ifndef __FreeBSD__ vmx->vmcs_state[vcpu] |= VS_LAUNCHED; -#endif - -#ifndef __FreeBSD__ ht_release(); +#else + bare_lgdt(&gdtr); + lidt(&idtr); + lldt(ldt_sel); #endif /* Collect some information for VM exit processing */ @@ -3522,7 +3567,7 @@ vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval) uint64_t gi; int error; - error = vmcs_getreg(&vmx->vmcs[vcpu], running, + error = vmcs_getreg(&vmx->vmcs[vcpu], running, VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi); *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; return (error); @@ -3566,8 +3611,8 @@ vmx_shadow_reg(int reg) switch (reg) { case VM_REG_GUEST_CR0: shreg = VMCS_CR0_SHADOW; - break; - case VM_REG_GUEST_CR4: + break; + case VM_REG_GUEST_CR4: shreg = VMCS_CR4_SHADOW; break; default: @@ -3638,7 +3683,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) if (shadow > 0) { /* * Store the unmodified value in the shadow - */ + */ error = vmcs_setreg(&vmx->vmcs[vcpu], running, VMCS_IDENT(shadow), val); } @@ -3821,7 +3866,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } } - return (retval); + return (retval); } struct vlapic_vtx { @@ -4174,7 +4219,7 @@ vmx_vlapic_init(void *arg, int vcpuid) struct vmx *vmx; struct vlapic *vlapic; struct vlapic_vtx *vlapic_vtx; - + vmx = arg; vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c index 1a2f493dd1..d276944800 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -89,6 +89,7 @@ enum { VIE_OP_TYPE_GROUP1, VIE_OP_TYPE_STOS, VIE_OP_TYPE_BITTEST, + VIE_OP_TYPE_TWOB_GRP15, VIE_OP_TYPE_LAST }; @@ -101,6 +102,10 @@ enum { #ifdef _KERNEL static const struct vie_op two_byte_opcodes[256] = { + [0xAE] = { + .op_byte = 0xAE, + .op_type = VIE_OP_TYPE_TWOB_GRP15, + }, [0xB6] = { .op_byte = 0xB6, .op_type = VIE_OP_TYPE_MOVZX, @@ -1458,6 +1463,37 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (0); } +static int +emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +{ + int error; + uint64_t buf; + + switch (vie->reg & 7) { + case 0x7: /* CLFLUSH, CLFLUSHOPT, and SFENCE */ + if (vie->mod == 0x3) { + /* + * SFENCE. Ignore it, VM exit provides enough + * barriers on its own. + */ + error = 0; + } else { + /* + * CLFLUSH, CLFLUSHOPT. Only check for access + * rights. + */ + error = memread(vm, vcpuid, gpa, &buf, 1, memarg); + } + break; + default: + error = EINVAL; + break; + } + + return (error); +} + int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, @@ -1518,6 +1554,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_bittest(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; + case VIE_OP_TYPE_TWOB_GRP15: + error = emulate_twob_group15(vm, vcpuid, gpa, vie, + memread, memwrite, memarg); + break; default: error = EINVAL; break; diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c index 5a6d7f9dd7..b02142e7e5 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.c +++ b/usr/src/uts/i86pc/io/vmm/x86.c @@ -141,17 +141,30 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, cpuid_count(*eax, *ecx, regs); if (vmm_is_amd()) { /* - * XXX this might appear silly because AMD - * cpus don't have threads. - * - * However this matches the logical cpus as - * advertised by leaf 0x1 and will work even - * if threads is set incorrectly on an AMD host. + * As on Intel (0000_0007:0, EDX), mask out + * unsupported or unsafe AMD extended features + * (8000_0008 EBX). */ + regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF | + AMDFEID_XSAVEERPTR); + vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); - logical_cpus = threads * cores; - regs[2] = logical_cpus - 1; + /* + * Here, width is ApicIdCoreIdSize, present on + * at least Family 15h and newer. It + * represents the "number of bits in the + * initial apicid that indicate thread id + * within a package." + * + * Our topo_probe_amd() uses it for + * pkg_id_shift and other OSes may rely on it. + */ + width = MIN(0xF, log2(threads * cores)); + if (width < 0x4) + width = 0; + logical_cpus = MIN(0xFF, threads * cores - 1); + regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus; } break; @@ -159,9 +172,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, cpuid_count(*eax, *ecx, regs); /* - * Hide SVM and Topology Extension features from guest. + * Hide SVM from guest. */ - regs[2] &= ~(AMDID2_SVM | AMDID2_TOPOLOGY); + regs[2] &= ~AMDID2_SVM; /* * Don't advertise extended performance counter MSRs @@ -226,6 +239,68 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, #endif /* __FreeBSD__ */ break; + case CPUID_8000_001D: + /* AMD Cache topology, like 0000_0004 for Intel. */ + if (!vmm_is_amd()) + goto default_leaf; + + /* + * Similar to Intel, generate a ficticious cache + * topology for the guest with L3 shared by the + * package, and L1 and L2 local to a core. + */ + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + switch (*ecx) { + case 0: + logical_cpus = threads; + level = 1; + func = 1; /* data cache */ + break; + case 1: + logical_cpus = threads; + level = 2; + func = 3; /* unified cache */ + break; + case 2: + logical_cpus = threads * cores; + level = 3; + func = 3; /* unified cache */ + break; + default: + logical_cpus = 0; + level = 0; + func = 0; + break; + } + + logical_cpus = MIN(0xfff, logical_cpus - 1); + regs[0] = (logical_cpus << 14) | (1 << 8) | + (level << 5) | func; + regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0; + regs[2] = 0; + regs[3] = 0; + break; + + case CPUID_8000_001E: + /* AMD Family 16h+ additional identifiers */ + if (!vmm_is_amd() || CPUID_TO_FAMILY(cpu_id) < 0x16) + goto default_leaf; + + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + regs[0] = vcpu_id; + threads = MIN(0xFF, threads - 1); + regs[1] = (threads << 8) | + (vcpu_id >> log2(threads + 1)); + /* + * XXX Bhyve topology cannot yet represent >1 node per + * processor. + */ + regs[2] = 0; + regs[3] = 0; + break; + case CPUID_0000_0001: do_cpuid(1, regs); @@ -366,7 +441,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512PF | CPUID_STDEXT_AVX512ER | - CPUID_STDEXT_AVX512CD); + CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA); regs[2] = 0; regs[3] = 0; @@ -398,35 +473,42 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, case CPUID_0000_000B: /* - * Processor topology enumeration + * Intel processor topology enumeration */ - vm_get_topology(vm, &sockets, &cores, &threads, - &maxcpus); - if (*ecx == 0) { - logical_cpus = threads; - width = log2(logical_cpus); - level = CPUID_TYPE_SMT; - x2apic_id = vcpu_id; - } + if (vmm_is_intel()) { + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + if (*ecx == 0) { + logical_cpus = threads; + width = log2(logical_cpus); + level = CPUID_TYPE_SMT; + x2apic_id = vcpu_id; + } - if (*ecx == 1) { - logical_cpus = threads * cores; - width = log2(logical_cpus); - level = CPUID_TYPE_CORE; - x2apic_id = vcpu_id; - } + if (*ecx == 1) { + logical_cpus = threads * cores; + width = log2(logical_cpus); + level = CPUID_TYPE_CORE; + x2apic_id = vcpu_id; + } - if (!cpuid_leaf_b || *ecx >= 2) { - width = 0; - logical_cpus = 0; - level = 0; - x2apic_id = 0; - } + if (!cpuid_leaf_b || *ecx >= 2) { + width = 0; + logical_cpus = 0; + level = 0; + x2apic_id = 0; + } - regs[0] = width & 0x1f; - regs[1] = logical_cpus & 0xffff; - regs[2] = (level << 8) | (*ecx & 0xff); - regs[3] = x2apic_id; + regs[0] = width & 0x1f; + regs[1] = logical_cpus & 0xffff; + regs[2] = (level << 8) | (*ecx & 0xff); + regs[3] = x2apic_id; + } else { + regs[0] = 0; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + } break; case CPUID_0000_000D: @@ -488,6 +570,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, break; default: +default_leaf: /* * The leaf value has already been clamped so * simply pass this through, keeping count of diff --git a/usr/src/uts/i86pc/io/vmm/x86.h b/usr/src/uts/i86pc/io/vmm/x86.h index 3a8e043852..0d70c04fd8 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.h +++ b/usr/src/uts/i86pc/io/vmm/x86.h @@ -49,6 +49,8 @@ #define CPUID_8000_0006 (0x80000006) #define CPUID_8000_0007 (0x80000007) #define CPUID_8000_0008 (0x80000008) +#define CPUID_8000_001D (0x8000001D) +#define CPUID_8000_001E (0x8000001E) /* * CPUID instruction Fn0000_0001: |
