summaryrefslogtreecommitdiff
path: root/usr/src/uts
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts')
-rw-r--r--usr/src/uts/common/Makefile.files4
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c79
-rw-r--r--usr/src/uts/common/conf/param.c13
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c2
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_fsops.c92
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_idmap.c14
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_sd.c33
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c7
-rw-r--r--usr/src/uts/common/fs/zfs/dnode.c2
-rw-r--r--usr/src/uts/common/fs/zfs/dnode_sync.c14
-rw-r--r--usr/src/uts/common/fs/zfs/lua/ldebug.c2
-rw-r--r--usr/src/uts/common/fs/zfs/metaslab.c2
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c3
-rw-r--r--usr/src/uts/common/fs/zfs/spa_config.c3
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c7
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_indirect.c6
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz.c2
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_fm.c2
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c2
-rw-r--r--usr/src/uts/common/fs/zfs/zio.c10
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c8
-rw-r--r--usr/src/uts/common/inet/ip/ipclassifier.c9
-rw-r--r--usr/src/uts/common/inet/ip/ipsecesp.c4
-rw-r--r--usr/src/uts/common/inet/ip/sadb.c12
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c40
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_output.c20
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/adapter.h4
-rw-r--r--usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c105
-rw-r--r--usr/src/uts/common/io/igb/igb_sensor.c17
-rw-r--r--usr/src/uts/common/io/ksensor/ksensor_drv.c16
-rw-r--r--usr/src/uts/common/io/ksensor/ksensor_test.c76
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.c15
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h23
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_cmd.c13
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c27
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_intr.c101
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_reg.h28
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_sensor.c126
-rw-r--r--usr/src/uts/common/io/tem.c78
-rw-r--r--usr/src/uts/common/io/tem_safe.c19
-rw-r--r--usr/src/uts/common/io/usb/usba/hubdi.c137
-rw-r--r--usr/src/uts/common/mapfiles/ksensor.mapfile4
-rw-r--r--usr/src/uts/common/os/cred.c9
-rw-r--r--usr/src/uts/common/os/ksensor.c40
-rw-r--r--usr/src/uts/common/os/softint.c46
-rw-r--r--usr/src/uts/common/sys/font.h8
-rw-r--r--usr/src/uts/common/sys/ksensor_impl.h2
-rw-r--r--usr/src/uts/common/sys/mac.h4
-rw-r--r--usr/src/uts/common/sys/sensors.h62
-rw-r--r--usr/src/uts/common/sys/smbios.h38
-rw-r--r--usr/src/uts/common/sys/smbios_impl.h18
-rw-r--r--usr/src/uts/common/sys/socket_proto.h10
-rw-r--r--usr/src/uts/common/sys/time.h27
-rw-r--r--usr/src/uts/i86pc/boot/boot_fb.c40
-rw-r--r--usr/src/uts/i86pc/io/apix/apix.c32
-rw-r--r--usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c12
-rw-r--r--usr/src/uts/i86pc/io/hpet_acpi.c95
-rw-r--r--usr/src/uts/i86pc/io/mp_platform_common.c86
-rw-r--r--usr/src/uts/i86pc/io/pcplusmp/apic.c13
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c415
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.h48
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.c405
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.h42
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/ept.c20
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/offsets.in1
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmcs.c448
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmcs.h125
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c642
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.h94
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h244
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c7
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vtd.c6
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/iommu.c34
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.c212
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h38
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.c97
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c43
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c2
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c2
-rw-r--r--usr/src/uts/i86pc/os/gipt.c6
-rw-r--r--usr/src/uts/i86pc/os/hma.c7
-rw-r--r--usr/src/uts/i86pc/os/mp_implfuncs.c15
-rw-r--r--usr/src/uts/i86pc/os/mp_machdep.c7
-rw-r--r--usr/src/uts/i86pc/os/startup.c20
-rw-r--r--usr/src/uts/i86pc/sys/hpet_acpi.h10
-rw-r--r--usr/src/uts/i86pc/sys/prom_debug.h72
-rw-r--r--usr/src/uts/i86pc/vmm/Makefile12
-rw-r--r--usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c21
-rw-r--r--usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c10
-rw-r--r--usr/src/uts/intel/io/coretemp/coretemp.c12
-rw-r--r--usr/src/uts/intel/io/pchtemp/pchtemp.c10
-rw-r--r--usr/src/uts/intel/tem/Makefile18
-rw-r--r--usr/src/uts/sparc/tem/Makefile26
-rw-r--r--usr/src/uts/sun/sys/ser_async.h10
-rw-r--r--usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c16
-rw-r--r--usr/src/uts/sun4v/ontario/io/tsalarm.c2
99 files changed, 2560 insertions, 2368 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 08fb3d45ac..e973cf58ad 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1694,7 +1694,7 @@ TEM_OBJS += tem.o tem_safe.o
# Font data for generated console fonts
#
i386_FONT = 8x16
-i386_FONT_SRC= ter-u16n
+i386_FONT_SRC= ter-u16b
sparc_FONT = 12x22
sparc_FONT_SRC= Gallant19
FONT=$($(MACH)_FONT)
@@ -2342,4 +2342,4 @@ BNX_OBJS += \
# mlxcx(7D)
#
MLXCX_OBJS += mlxcx.o mlxcx_dma.o mlxcx_cmd.o mlxcx_intr.o mlxcx_gld.o \
- mlxcx_ring.o
+ mlxcx_ring.o mlxcx_sensor.o
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
index c44c32ef29..575acd59a2 100644
--- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -22,6 +22,7 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -470,7 +471,7 @@ typedef struct lxpr_rlimtab {
char *rlim_rctl; /* rctl source */
} lxpr_rlimtab_t;
-#define RLIM_MAXFD "Max open files"
+#define RLIM_MAXFD "Max open files"
static lxpr_rlimtab_t lxpr_rlimtab[] = {
{ "Max cpu time", "seconds", "process.max-cpu-time" },
@@ -1737,8 +1738,9 @@ lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
* match the max value so that we do not output "unlimited".
*/
if (strcmp(lxpr_rlimtab[i].rlim_name, RLIM_MAXFD) == 0 &&
- cur[i] == RLIM_INFINITY)
- cur[i] = max[i];
+ cur[i] == RLIM_INFINITY) {
+ cur[i] = max[i];
+ }
}
lxpr_unlock(p);
@@ -4001,10 +4003,10 @@ lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
* model, so just inform the caller that no swap is being used.
*
* MemAvailable
- * MemAvailable entry is available since Linux Kernel +3.14, is an
- * estimate of how much memory is available for starting new applications,
- * without swapping. In lxbrand we will always return the available free
- * memory as an estimate of this value.
+ * MemAvailable entry is available since Linux Kernel +3.14, is an
+ * estimate of how much memory is available for starting new
+ * applications, without swapping. In lxbrand we will always return the
+ * available free memory as an estimate of this value.
*/
lxpr_uiobuf_printf(uiobuf,
"MemTotal: %8lu kB\n"
@@ -8094,6 +8096,58 @@ lxpr_write_pid_loginuid(lxpr_node_t *lxpnp, struct uio *uio, struct cred *cr,
return (0);
}
+static int
+lxpr_readlink_exe(lxpr_node_t *lxpnp, char *buf, size_t size, cred_t *cr)
+{
+ size_t dlen = DIRENT64_RECLEN(MAXPATHLEN);
+ dirent64_t *dp;
+ vnode_t *dirvp;
+ int error = ENOENT;
+ char *dbuf;
+ proc_t *p;
+ size_t len;
+
+ p = lxpr_lock(lxpnp, NO_ZOMB);
+
+ if (p == NULL)
+ return (error);
+
+ dirvp = p->p_execdir;
+ if (dirvp == NULL) {
+ lxpr_unlock(p);
+ return (error);
+ }
+
+ VN_HOLD(dirvp);
+ lxpr_unlock(p);
+
+ /* Look up the parent directory path */
+ if ((error = vnodetopath(NULL, dirvp, buf, size, cr)) != 0) {
+ VN_RELE(dirvp);
+ return (error);
+ }
+
+ len = strlen(buf);
+
+ dbuf = kmem_alloc(dlen, KM_SLEEP);
+
+ /*
+ * Walk the parent directory to find the vnode for p->p_exec, in order
+ * to derive its path.
+ */
+ if ((error = dirfindvp(NULL, dirvp, lxpnp->lxpr_realvp,
+ cr, dbuf, dlen, &dp)) == 0 &&
+ strlen(dp->d_name) + len + 1 < size) {
+ buf[len] = '/';
+ (void) strcpy(buf + len + 1, dp->d_name);
+ } else {
+ error = ENOENT;
+ }
+ VN_RELE(dirvp);
+ kmem_free(dbuf, dlen);
+ return (error);
+}
+
/*
* lxpr_readlink(): Vnode operation for VOP_READLINK()
*/
@@ -8135,7 +8189,16 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
if (error != 0)
return (error);
- if ((error = vnodetopath(NULL, rvp, bp, buflen, cr)) != 0) {
+ error = vnodetopath(NULL, rvp, bp, buflen, cr);
+
+ /*
+ * Special handling for /proc/<pid>/exe where the vnode path is
+ * not cached.
+ */
+ if (error != 0 && lxpnp->lxpr_type == LXPR_PID_EXE)
+ error = lxpr_readlink_exe(lxpnp, bp, buflen, cr);
+
+ if (error != 0) {
/*
* Special handling possible for /proc/<pid>/fd/<num>
* Generate <type>:[<inode>] links, if allowed.
diff --git a/usr/src/uts/common/conf/param.c b/usr/src/uts/common/conf/param.c
index 1120748b98..06920c3574 100644
--- a/usr/src/uts/common/conf/param.c
+++ b/usr/src/uts/common/conf/param.c
@@ -116,7 +116,7 @@ const unsigned int _diskrpm = (unsigned int)DISKRPM;
const unsigned long _pgthresh = (unsigned long)PGTHRESH;
const unsigned int _maxslp = (unsigned int)MAXSLP;
const unsigned long _maxhandspreadpages = (unsigned long)MAXHANDSPREADPAGES;
-const int _ncpu = (int)NCPU;
+const int _ncpu = (int)NCPU;
const int _ncpu_log2 = (int)NCPU_LOG2;
const int _ncpu_p2 = (int)NCPU_P2;
const unsigned long _defaultstksz = (unsigned long)DEFAULTSTKSZ;
@@ -131,9 +131,12 @@ const unsigned int _nbpg = (unsigned int)MMU_PAGESIZE;
*/
/*
- * Default hz is 100, but if we set hires_tick we get higher resolution
- * clock behavior (currently defined to be 1000 hz). Higher values seem
- * to work, but are not supported.
+ * hz is 100, but we set hires_tick to get higher resolution clock behavior
+ * (currently defined to be 1000 hz). Higher values seem to work, but are not
+ * supported.
+ *
+ * This is configured via hires_tick to allow users to explicitly customize it
+ * to 0 should the need arise.
*
* If we do decide to play with higher values, remember that hz should
* satisfy the following constraints to avoid integer round-off problems:
@@ -160,7 +163,7 @@ const unsigned int _nbpg = (unsigned int)MMU_PAGESIZE;
int hz = HZ_DEFAULT;
int hires_hz = HIRES_HZ_DEFAULT;
-int hires_tick = 0;
+int hires_tick = 1;
int cpu_decay_factor = 10; /* this is no longer tied to clock */
int max_hres_adj; /* maximum adjustment of hrtime per tick */
int tick_per_msec; /* clock ticks per millisecond (zero if hz < 1000) */
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c b/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c
index 4240328207..4a657bbf19 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c
@@ -447,6 +447,8 @@ smb2_fsctl_copychunk_meta(smb_request_t *sr, smb_ofile_t *src_of)
* here don't generally have WRITE_DAC access (sigh) so we
* have to bypass ofile access checks for this operation.
* The file-system level still does its access checking.
+ *
+ * TODO: this should really copy the SACL, too.
*/
smb_fssd_init(&fs_sd, secinfo, sd_flags);
sr->fid_ofile = NULL;
diff --git a/usr/src/uts/common/fs/smbsrv/smb_fsops.c b/usr/src/uts/common/fs/smbsrv/smb_fsops.c
index 8fafac5f60..43b513e840 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_fsops.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_fsops.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2020 Nexenta by DDN, Inc. All rights reserved.
*/
#include <sys/sid.h>
@@ -147,10 +147,9 @@ smb_fsop_create_with_sd(smb_request_t *sr, cred_t *cr,
is_dir = ((fs_sd->sd_flags & SMB_FSSD_FLAGS_DIR) != 0);
if (smb_tree_has_feature(sr->tid_tree, SMB_TREE_ACLONCREATE)) {
- if (fs_sd->sd_secinfo & SMB_ACL_SECINFO) {
- dacl = fs_sd->sd_zdacl;
- sacl = fs_sd->sd_zsacl;
- ASSERT(dacl || sacl);
+ dacl = fs_sd->sd_zdacl;
+ sacl = fs_sd->sd_zsacl;
+ if (dacl != NULL || sacl != NULL) {
if (dacl && sacl) {
acl = smb_fsacl_merge(dacl, sacl);
} else if (dacl) {
@@ -466,15 +465,20 @@ smb_fsop_create_file(smb_request_t *sr, cred_t *cr,
if (op->sd) {
/*
* SD sent by client in Windows format. Needs to be
- * converted to FS format. No inheritance.
+ * converted to FS format. Inherit DACL/SACL if they're not
+ * specified.
*/
secinfo = smb_sd_get_secinfo(op->sd);
+
smb_fssd_init(&fs_sd, secinfo, 0);
status = smb_sd_tofs(op->sd, &fs_sd);
if (status == NT_STATUS_SUCCESS) {
- rc = smb_fsop_create_with_sd(sr, cr, dnode,
- name, attr, ret_snode, &fs_sd);
+ rc = smb_fsop_sdinherit(sr, dnode, &fs_sd);
+ if (rc == 0)
+ rc = smb_fsop_create_with_sd(sr, cr, dnode,
+ name, attr, ret_snode, &fs_sd);
+
} else {
rc = EINVAL;
}
@@ -485,7 +489,7 @@ smb_fsop_create_file(smb_request_t *sr, cred_t *cr,
* Server applies Windows inheritance rules,
* see smb_fsop_sdinherit() comments as to why.
*/
- smb_fssd_init(&fs_sd, SMB_ACL_SECINFO, 0);
+ smb_fssd_init(&fs_sd, 0, 0);
rc = smb_fsop_sdinherit(sr, dnode, &fs_sd);
if (rc == 0) {
rc = smb_fsop_create_with_sd(sr, cr, dnode,
@@ -607,15 +611,19 @@ smb_fsop_mkdir(
if (op->sd) {
/*
* SD sent by client in Windows format. Needs to be
- * converted to FS format. No inheritance.
+ * converted to FS format. Inherit DACL/SACL if they're not
+ * specified.
*/
secinfo = smb_sd_get_secinfo(op->sd);
+
smb_fssd_init(&fs_sd, secinfo, SMB_FSSD_FLAGS_DIR);
status = smb_sd_tofs(op->sd, &fs_sd);
if (status == NT_STATUS_SUCCESS) {
- rc = smb_fsop_create_with_sd(sr, cr, dnode,
- name, attr, ret_snode, &fs_sd);
+ rc = smb_fsop_sdinherit(sr, dnode, &fs_sd);
+ if (rc == 0)
+ rc = smb_fsop_create_with_sd(sr, cr, dnode,
+ name, attr, ret_snode, &fs_sd);
}
else
rc = EINVAL;
@@ -626,7 +634,7 @@ smb_fsop_mkdir(
* Server applies Windows inheritance rules,
* see smb_fsop_sdinherit() comments as to why.
*/
- smb_fssd_init(&fs_sd, SMB_ACL_SECINFO, SMB_FSSD_FLAGS_DIR);
+ smb_fssd_init(&fs_sd, 0, SMB_FSSD_FLAGS_DIR);
rc = smb_fsop_sdinherit(sr, dnode, &fs_sd);
if (rc == 0) {
rc = smb_fsop_create_with_sd(sr, cr, dnode,
@@ -2391,6 +2399,8 @@ smb_fsop_sdmerge(smb_request_t *sr, smb_node_t *snode, smb_fssd_t *fs_sd)
* owner has been specified. Callers should translate this to
* STATUS_INVALID_OWNER which is not the normal mapping for EPERM
* in upper layers, so EPERM is mapped to EBADE.
+ *
+ * If 'overwrite' is non-zero, then the existing ACL is ignored.
*/
int
smb_fsop_sdwrite(smb_request_t *sr, cred_t *cr, smb_node_t *snode,
@@ -2456,14 +2466,13 @@ smb_fsop_sdwrite(smb_request_t *sr, cred_t *cr, smb_node_t *snode,
}
if (fs_sd->sd_secinfo & SMB_ACL_SECINFO) {
- if (overwrite == 0) {
+ if (overwrite == 0)
error = smb_fsop_sdmerge(sr, snode, fs_sd);
- if (error)
- return (error);
- }
- error = smb_fsop_aclwrite(sr, cr, snode, fs_sd);
- if (error) {
+ if (error == 0)
+ error = smb_fsop_aclwrite(sr, cr, snode, fs_sd);
+
+ if (error != 0) {
/*
* Revert uid/gid changes if required.
*/
@@ -2511,39 +2520,46 @@ smb_fsop_sdinherit(smb_request_t *sr, smb_node_t *dnode, smb_fssd_t *fs_sd)
acl_t *sacl = NULL;
int is_dir;
int error;
+ uint32_t secinfo;
+ smb_fssd_t pfs_sd;
ASSERT(fs_sd);
- if (sr->tid_tree->t_acltype != ACE_T) {
- /*
- * No forced inheritance for non-ZFS filesystems.
- */
- fs_sd->sd_secinfo = 0;
+ secinfo = fs_sd->sd_secinfo;
+
+ /* Anything to do? */
+ if ((secinfo & SMB_ACL_SECINFO) == SMB_ACL_SECINFO)
+ return (0);
+
+ /*
+ * No forced inheritance for non-ZFS filesystems.
+ */
+ if (sr->tid_tree->t_acltype != ACE_T)
return (0);
- }
+ smb_fssd_init(&pfs_sd, SMB_ACL_SECINFO, fs_sd->sd_flags);
/* Fetch parent directory's ACL */
- error = smb_fsop_sdread(sr, zone_kcred(), dnode, fs_sd);
+ error = smb_fsop_sdread(sr, zone_kcred(), dnode, &pfs_sd);
if (error) {
return (error);
}
is_dir = (fs_sd->sd_flags & SMB_FSSD_FLAGS_DIR);
- dacl = smb_fsacl_inherit(fs_sd->sd_zdacl, is_dir, SMB_DACL_SECINFO,
- sr->user_cr);
- sacl = smb_fsacl_inherit(fs_sd->sd_zsacl, is_dir, SMB_SACL_SECINFO,
- sr->user_cr);
-
- if (sacl == NULL)
- fs_sd->sd_secinfo &= ~SMB_SACL_SECINFO;
-
- smb_fsacl_free(fs_sd->sd_zdacl);
- smb_fsacl_free(fs_sd->sd_zsacl);
+ if ((secinfo & SMB_DACL_SECINFO) == 0) {
+ dacl = smb_fsacl_inherit(pfs_sd.sd_zdacl, is_dir,
+ SMB_DACL_SECINFO, sr->user_cr);
+ fs_sd->sd_zdacl = dacl;
+ }
- fs_sd->sd_zdacl = dacl;
- fs_sd->sd_zsacl = sacl;
+ if ((secinfo & SMB_SACL_SECINFO) == 0) {
+ sacl = smb_fsacl_inherit(pfs_sd.sd_zsacl, is_dir,
+ SMB_SACL_SECINFO, sr->user_cr);
+ fs_sd->sd_zsacl = sacl;
+ }
+ smb_fsacl_free(pfs_sd.sd_zdacl);
+ smb_fsacl_free(pfs_sd.sd_zsacl);
return (0);
}
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/fs/smbsrv/smb_idmap.c b/usr/src/uts/common/fs/smbsrv/smb_idmap.c
index b9bfa991c4..e6c04193b0 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_idmap.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_idmap.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2020 Nexenta by DDN, Inc. All rights reserved.
*/
/*
@@ -83,12 +83,12 @@ smb_idmap_getsid(uid_t id, int idtype, smb_sid_t **sid)
switch (idtype) {
case SMB_IDMAP_USER:
- sim.sim_stat = kidmap_getsidbyuid(global_zone, id,
+ sim.sim_stat = kidmap_getsidbyuid(curzone, id,
(const char **)&sim.sim_domsid, &sim.sim_rid);
break;
case SMB_IDMAP_GROUP:
- sim.sim_stat = kidmap_getsidbygid(global_zone, id,
+ sim.sim_stat = kidmap_getsidbygid(curzone, id,
(const char **)&sim.sim_domsid, &sim.sim_rid);
break;
@@ -150,17 +150,17 @@ smb_idmap_getid(smb_sid_t *sid, uid_t *id, int *idtype)
switch (*idtype) {
case SMB_IDMAP_USER:
- sim.sim_stat = kidmap_getuidbysid(global_zone, sim.sim_domsid,
+ sim.sim_stat = kidmap_getuidbysid(curzone, sim.sim_domsid,
sim.sim_rid, sim.sim_id);
break;
case SMB_IDMAP_GROUP:
- sim.sim_stat = kidmap_getgidbysid(global_zone, sim.sim_domsid,
+ sim.sim_stat = kidmap_getgidbysid(curzone, sim.sim_domsid,
sim.sim_rid, sim.sim_id);
break;
case SMB_IDMAP_UNKNOWN:
- sim.sim_stat = kidmap_getpidbysid(global_zone, sim.sim_domsid,
+ sim.sim_stat = kidmap_getpidbysid(curzone, sim.sim_domsid,
sim.sim_rid, sim.sim_id, &sim.sim_idtype);
break;
@@ -186,7 +186,7 @@ smb_idmap_batch_create(smb_idmap_batch_t *sib, uint16_t nmap, int flags)
bzero(sib, sizeof (smb_idmap_batch_t));
- sib->sib_idmaph = kidmap_get_create(global_zone);
+ sib->sib_idmaph = kidmap_get_create(curzone);
sib->sib_flags = flags;
sib->sib_nmap = nmap;
diff --git a/usr/src/uts/common/fs/smbsrv/smb_sd.c b/usr/src/uts/common/fs/smbsrv/smb_sd.c
index ddbd7b9413..f7e056c511 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_sd.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_sd.c
@@ -22,7 +22,7 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2020 Nexenta by DDN, Inc. All rights reserved.
*/
/*
@@ -243,16 +243,29 @@ smb_sd_tofs(smb_sd_t *sd, smb_fssd_t *fs_sd)
}
}
+ /*
+ * In SMB, the 'secinfo' determines which parts of the SD the client
+ * intends to change. Notably, this includes changing the DACL_PRESENT
+ * and SACL_PRESENT control bits. The client can specify e.g.
+ * SACL_SECINFO, but not SACL_PRESENT, and this means the client intends
+ * to remove the SACL.
+ *
+ * If the *_PRESENT bit isn't set, then the respective ACL will be NULL.
+ * [MS-DTYP] disallows providing an ACL when the PRESENT bit isn't set.
+ * This is enforced by smb_decode_sd().
+ *
+ * We allow the SACL to be NULL, but we MUST have a DACL.
+ * If the DACL is NULL, that's equivalent to "everyone:full_set:allow".
+ */
+
/* DACL */
if (fs_sd->sd_secinfo & SMB_DACL_SECINFO) {
- if (sd->sd_control & SE_DACL_PRESENT) {
- status = smb_acl_to_zfs(sd->sd_dacl, flags,
- SMB_DACL_SECINFO, &fs_sd->sd_zdacl);
- if (status != NT_STATUS_SUCCESS)
- return (status);
- }
- else
- return (NT_STATUS_INVALID_ACL);
+ ASSERT3U(((sd->sd_control & SE_DACL_PRESENT) != 0), ==,
+ (sd->sd_dacl != NULL));
+ status = smb_acl_to_zfs(sd->sd_dacl, flags,
+ SMB_DACL_SECINFO, &fs_sd->sd_zdacl);
+ if (status != NT_STATUS_SUCCESS)
+ return (status);
}
/* SACL */
@@ -263,8 +276,6 @@ smb_sd_tofs(smb_sd_t *sd, smb_fssd_t *fs_sd)
if (status != NT_STATUS_SUCCESS) {
return (status);
}
- } else {
- return (NT_STATUS_INVALID_ACL);
}
}
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 9e04e5e00d..939282b378 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -2538,7 +2538,7 @@ arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
*/
ret = SET_ERROR(EIO);
spa_log_error(spa, zb);
- zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, zb, NULL, 0, 0);
}
@@ -5801,7 +5801,8 @@ arc_read_done(zio_t *zio)
error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(zio->io_spa, &acb->acb_zb);
- zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+ (void) zfs_ereport_post(
+ FM_EREPORT_ZFS_AUTHENTICATION,
zio->io_spa, NULL, &acb->acb_zb, zio, 0, 0);
}
}
@@ -6058,7 +6059,7 @@ top:
rc = SET_ERROR(EIO);
if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, zb);
- zfs_ereport_post(
+ (void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, zb, NULL, 0, 0);
}
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c
index f5ef390896..345189f695 100644
--- a/usr/src/uts/common/fs/zfs/dnode.c
+++ b/usr/src/uts/common/fs/zfs/dnode.c
@@ -1197,7 +1197,7 @@ dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
dnode_t *dn;
zrl_init(&dnh->dnh_zrlock);
- zrl_tryenter(&dnh->dnh_zrlock);
+ VERIFY3U(1, ==, zrl_tryenter(&dnh->dnh_zrlock));
dn = dnode_create(os, dnp, NULL, object, dnh);
DNODE_VERIFY(dn);
diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c
index dc7317b411..4a060403da 100644
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2020 Oxide Computer Company
*/
#include <sys/zfs_context.h>
@@ -736,13 +737,22 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dsfra.dsfra_dnode = dn;
dsfra.dsfra_tx = tx;
dsfra.dsfra_free_indirects = freeing_dnode;
+ mutex_enter(&dn->dn_mtx);
if (freeing_dnode) {
ASSERT(range_tree_contains(dn->dn_free_ranges[txgoff],
0, dn->dn_maxblkid + 1));
}
- mutex_enter(&dn->dn_mtx);
- range_tree_vacate(dn->dn_free_ranges[txgoff],
+ /*
+ * Because dnode_sync_free_range() must drop dn_mtx during its
+ * processing, using it as a callback to range_tree_vacate() is
+ * not safe. No other operations (besides destroy) are allowed
+ * once range_tree_vacate() has begun, and dropping dn_mtx
+ * would leave a window open for another thread to observe that
+ * invalid (and unsafe) state.
+ */
+ range_tree_walk(dn->dn_free_ranges[txgoff],
dnode_sync_free_range, &dsfra);
+ range_tree_vacate(dn->dn_free_ranges[txgoff], NULL, NULL);
range_tree_destroy(dn->dn_free_ranges[txgoff]);
dn->dn_free_ranges[txgoff] = NULL;
mutex_exit(&dn->dn_mtx);
diff --git a/usr/src/uts/common/fs/zfs/lua/ldebug.c b/usr/src/uts/common/fs/zfs/lua/ldebug.c
index b8ddcff3c6..4ed0094bde 100644
--- a/usr/src/uts/common/fs/zfs/lua/ldebug.c
+++ b/usr/src/uts/common/fs/zfs/lua/ldebug.c
@@ -467,7 +467,7 @@ static const char *getfuncname (lua_State *L, CallInfo *ci, const char **name) {
return getobjname(p, pc, GETARG_A(i), name);
case OP_TFORCALL: { /* for iterator */
*name = "for iterator";
- return "for iterator";
+ return "for iterator";
}
/* all other instructions can call only through metamethods */
case OP_SELF:
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index 42ba1f9a46..fe53d142c2 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -2414,7 +2414,7 @@ metaslab_load_impl(metaslab_t *msp)
msp->ms_max_size = metaslab_largest_allocatable(msp);
ASSERT3U(max_size, <=, msp->ms_max_size);
hrtime_t load_end = gethrtime();
- msp->ms_load_time = load_end;
+ msp->ms_load_time = load_end;
if (zfs_flags & ZFS_DEBUG_LOG_SPACEMAP) {
zfs_dbgmsg("loading: txg %llu, spa %s, vdev_id %llu, "
"ms_id %llu, smp_length %llu, "
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index fc08eebbc0..a040fbfea5 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -2408,7 +2408,8 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
spa->spa_loaded_ts.tv_nsec = 0;
}
if (error != EBADF) {
- zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0);
+ (void) zfs_ereport_post(ereport, spa,
+ NULL, NULL, NULL, 0, 0);
}
}
spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c
index 4719696ca4..ae814208fd 100644
--- a/usr/src/uts/common/fs/zfs/spa_config.c
+++ b/usr/src/uts/common/fs/zfs/spa_config.c
@@ -280,7 +280,8 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
* resource issues are resolved.
*/
if (target->spa_ccw_fail_time == 0) {
- zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
+ (void) zfs_ereport_post(
+ FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
target, NULL, NULL, NULL, 0, 0);
}
target->spa_ccw_fail_time = gethrtime();
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index e82b309537..254af68099 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -1365,7 +1365,7 @@ vdev_probe_done(zio_t *zio)
} else {
ASSERT(zio->io_error != 0);
vdev_dbgmsg(vd, "failed probe");
- zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
spa, vd, NULL, NULL, 0, 0);
zio->io_error = SET_ERROR(ENXIO);
}
@@ -1717,7 +1717,8 @@ vdev_open(vdev_t *vd)
*/
if (ashift > vd->vdev_top->vdev_ashift &&
vd->vdev_ops->vdev_op_leaf) {
- zfs_ereport_post(FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
+ (void) zfs_ereport_post(
+ FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
spa, vd, NULL, NULL, 0, 0);
}
@@ -4408,7 +4409,7 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
}
- zfs_ereport_post(class, spa, vd, NULL, NULL,
+ (void) zfs_ereport_post(class, spa, vd, NULL, NULL,
save_state, 0);
}
diff --git a/usr/src/uts/common/fs/zfs/vdev_indirect.c b/usr/src/uts/common/fs/zfs/vdev_indirect.c
index effea61bc6..6c636dd4d2 100644
--- a/usr/src/uts/common/fs/zfs/vdev_indirect.c
+++ b/usr/src/uts/common/fs/zfs/vdev_indirect.c
@@ -1382,8 +1382,8 @@ vdev_indirect_checksum_error(zio_t *zio,
void *bad_buf = abd_borrow_buf_copy(ic->ic_data, is->is_size);
abd_t *good_abd = is->is_good_child->ic_data;
void *good_buf = abd_borrow_buf_copy(good_abd, is->is_size);
- zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, zio,
- is->is_target_offset, is->is_size, good_buf, bad_buf, &zbc);
+ (void) zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark,
+ zio, is->is_target_offset, is->is_size, good_buf, bad_buf, &zbc);
abd_return_buf(ic->ic_data, bad_buf, is->is_size);
abd_return_buf(good_abd, good_buf, is->is_size);
}
@@ -1459,7 +1459,7 @@ vdev_indirect_all_checksum_errors(zio_t *zio)
vd->vdev_stat.vs_checksum_errors++;
mutex_exit(&vd->vdev_stat_lock);
- zfs_ereport_post_checksum(zio->io_spa, vd,
+ (void) zfs_ereport_post_checksum(zio->io_spa, vd,
&zio->io_bookmark, zio, is->is_target_offset,
is->is_size, NULL, NULL, NULL);
}
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index e4db03ce89..381c2ff84f 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -1968,7 +1968,7 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;
- zfs_ereport_post_checksum(zio->io_spa, vd,
+ (void) zfs_ereport_post_checksum(zio->io_spa, vd,
&zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
rc->rc_abd, bad_data, &zbc);
}
diff --git a/usr/src/uts/common/fs/zfs/zfs_fm.c b/usr/src/uts/common/fs/zfs/zfs_fm.c
index dd854c12e1..2118fd549e 100644
--- a/usr/src/uts/common/fs/zfs/zfs_fm.c
+++ b/usr/src/uts/common/fs/zfs/zfs_fm.c
@@ -735,7 +735,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
report->zcr_length = length;
#ifdef _KERNEL
- zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
+ (void) zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length);
if (report->zcr_ereport == NULL) {
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 99011b83b4..c016b5c1ea 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -4839,7 +4839,7 @@ zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
{
if (vp->v_type == VDIR)
return (0);
- return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
+ return ((*noffp < 0) ? EINVAL : 0);
}
/*
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index 5215a58bf2..9981263343 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -483,7 +483,7 @@ error:
zio->io_error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, &zio->io_bookmark);
- zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, &zio->io_bookmark, zio, 0, 0);
}
} else {
@@ -1995,7 +1995,7 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
"failure and has been suspended; `zpool clear` will be required "
"before the pool can be written to.", spa_name(spa));
- zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
NULL, NULL, 0, 0);
mutex_enter(&spa->spa_suspend_lock);
@@ -4265,7 +4265,7 @@ zio_done(zio_t *zio)
zio->io_vd->vdev_stat.vs_slow_ios++;
mutex_exit(&zio->io_vd->vdev_stat_lock);
- zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd, &zio->io_bookmark,
zio, 0, 0);
}
@@ -4280,7 +4280,7 @@ zio_done(zio_t *zio)
* device is currently unavailable.
*/
if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd))
- zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd,
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd,
&zio->io_bookmark, zio, 0, 0);
if ((zio->io_error == EIO || !(zio->io_flags &
@@ -4291,7 +4291,7 @@ zio_done(zio_t *zio)
* error and generate a logical data ereport.
*/
spa_log_error(spa, &zio->io_bookmark);
- zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL,
+ (void) zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL,
&zio->io_bookmark, zio, 0, 0);
}
}
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index 2e684a5ff0..2495fb015d 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -1161,10 +1161,10 @@ zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
ASSERT(size <= zv->zv_volblocksize);
/* Locate the extent this belongs to */
- ze = list_head(&zv->zv_extents);
- while (offset >= ze->ze_nblks * zv->zv_volblocksize) {
+ for (ze = list_head(&zv->zv_extents);
+ ze != NULL && offset >= ze->ze_nblks * zv->zv_volblocksize;
+ ze = list_next(&zv->zv_extents, ze)) {
offset -= ze->ze_nblks * zv->zv_volblocksize;
- ze = list_next(&zv->zv_extents, ze);
}
if (ze == NULL)
@@ -1232,7 +1232,7 @@ zvol_strategy(buf_t *bp)
addr = bp->b_un.b_addr;
resid = bp->b_bcount;
- if (resid > 0 && (off < 0 || off >= volsize)) {
+ if (resid > 0 && off >= volsize) {
bioerror(bp, EIO);
biodone(bp);
return (0);
diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c
index 4f3ec2d817..69af77db9a 100644
--- a/usr/src/uts/common/inet/ip/ipclassifier.c
+++ b/usr/src/uts/common/inet/ip/ipclassifier.c
@@ -22,6 +22,7 @@
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2016 Joyent, Inc.
* Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 Joyent, Inc.
*/
/*
@@ -2772,7 +2773,11 @@ conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie)
return (NULL);
}
- mutex_exit(&connp->conn_lock);
+ /*
+ * Continue to hold conn_lock because we don't want to race with an
+ * in-progress close, which will have set-to-NULL (and destroyed
+ * upper_handle, aka sonode (and vnode)) BEFORE setting CONN_CLOSING.
+ */
if (connp->conn_upper_handle != NULL) {
vn = (*connp->conn_upcalls->su_get_vnode)
@@ -2784,6 +2789,8 @@ conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie)
flags |= MIB2_SOCKINFO_STREAM;
}
+ mutex_exit(&connp->conn_lock);
+
if (vn == NULL || VOP_GETATTR(vn, &attr, 0, CRED(), NULL) != 0) {
if (vn != NULL)
VN_RELE(vn);
diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c
index e0efbbf3ce..4b4e88dcf6 100644
--- a/usr/src/uts/common/inet/ip/ipsecesp.c
+++ b/usr/src/uts/common/inet/ip/ipsecesp.c
@@ -1843,6 +1843,7 @@ esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira,
ipsec_stack_t *ipss = ns->netstack_ipsec;
ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
+ mp = NULL;
do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
force = (assoc->ipsa_flags & IPSA_F_ASYNC);
@@ -2172,6 +2173,7 @@ esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc,
esp3dbg(espstack, ("esp_submit_req_outbound:%s",
is_natt ? "natt" : "not natt"));
+ mp = NULL;
do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
force = (assoc->ipsa_flags & IPSA_F_ASYNC);
@@ -2441,6 +2443,7 @@ esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
* Reality check....
*/
ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */
+ ip6h = (ip6_t *)ipha;
if (ixa->ixa_flags & IXAF_IS_IPV4) {
ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
@@ -2455,7 +2458,6 @@ esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
af = AF_INET6;
- ip6h = (ip6_t *)ipha;
bzero(&ipp, sizeof (ipp));
divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL);
if (ipp.ipp_dstopts != NULL &&
diff --git a/usr/src/uts/common/inet/ip/sadb.c b/usr/src/uts/common/inet/ip/sadb.c
index 288c0e3e18..5f1d1c96ee 100644
--- a/usr/src/uts/common/inet/ip/sadb.c
+++ b/usr/src/uts/common/inet/ip/sadb.c
@@ -1067,6 +1067,15 @@ sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
int srcidsize, dstidsize, senslen, osenslen;
sa_family_t fam, pfam; /* Address family for SADB_EXT_ADDRESS */
/* src/dst and proxy sockaddrs. */
+
+ authsize = 0;
+ encrsize = 0;
+ pfam = 0;
+ srcidsize = 0;
+ dstidsize = 0;
+ paddrsize = 0;
+ senslen = 0;
+ osenslen = 0;
/*
* The following are pointers into the PF_KEY message this PF_KEY
* message creates.
@@ -1100,6 +1109,7 @@ sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
*/
alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
sizeof (sadb_lifetime_t);
+ otherspi = 0;
fam = ipsa->ipsa_addrfam;
switch (fam) {
@@ -1770,6 +1780,8 @@ sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
(ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
(ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
+ diagnostic = 0;
+
/* Assign both sockaddrs, the compiler will do the right thing. */
sin = (struct sockaddr_in *)(addr + 1);
sin6 = (struct sockaddr_in6 *)(addr + 1);
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index 554fe8b78f..88d558fd10 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -21,10 +21,10 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -1018,10 +1018,23 @@ finish:
/* If we have an upper handle (socket), release it */
if (IPCL_IS_NONSTR(connp)) {
- ASSERT(connp->conn_upper_handle != NULL);
- (*connp->conn_upcalls->su_closed)(connp->conn_upper_handle);
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ /*
+ * Set these to NULL first because closed() will free upper
+ * structures. Acquire conn_lock because an external caller
+ * like conn_get_socket_info() will upcall if these are
+ * non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
connp->conn_upper_handle = NULL;
connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
+ upcalls->su_closed(handle);
}
}
@@ -1435,13 +1448,26 @@ tcp_free(tcp_t *tcp)
* nothing to do other than clearing the field.
*/
if (connp->conn_upper_handle != NULL) {
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ /*
+ * Set these to NULL first because closed() will free upper
+ * structures. Acquire conn_lock because an external caller
+ * like conn_get_socket_info() will upcall if these are
+ * non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
+ connp->conn_upper_handle = NULL;
+ connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
if (IPCL_IS_NONSTR(connp)) {
- (*connp->conn_upcalls->su_closed)(
- connp->conn_upper_handle);
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ upcalls->su_closed(handle);
tcp->tcp_detached = B_TRUE;
}
- connp->conn_upper_handle = NULL;
- connp->conn_upcalls = NULL;
}
}
diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c
index 7a0472f3dd..086668f435 100644
--- a/usr/src/uts/common/inet/tcp/tcp_output.c
+++ b/usr/src/uts/common/inet/tcp/tcp_output.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2017 by Delphix. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
/* This file contains all TCP output processing functions. */
@@ -1677,11 +1677,23 @@ finish:
/* non-STREAM socket, release the upper handle */
if (IPCL_IS_NONSTR(connp)) {
- ASSERT(connp->conn_upper_handle != NULL);
- (*connp->conn_upcalls->su_closed)
- (connp->conn_upper_handle);
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ /*
+ * Set these to NULL first because closed() will free
+ * upper structures. Acquire conn_lock because an
+ * external caller like conn_get_socket_info() will
+ * upcall if these are non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
connp->conn_upper_handle = NULL;
connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
+ upcalls->su_closed(handle);
}
}
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/adapter.h b/usr/src/uts/common/io/cxgbe/t4nex/adapter.h
index 48edc44341..1192eeb43e 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/adapter.h
+++ b/usr/src/uts/common/io/cxgbe/t4nex/adapter.h
@@ -559,6 +559,10 @@ struct adapter {
kmutex_t sfl_lock; /* same cache-line as sc_lock? but that's ok */
TAILQ_HEAD(, sge_fl) sfl;
timeout_id_t sfl_timer;
+
+ /* Sensors */
+ id_t temp_sensor;
+ id_t volt_sensor;
};
enum {
diff --git a/usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c b/usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c
index ec590228b6..05732e47a1 100644
--- a/usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c
+++ b/usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c
@@ -37,6 +37,7 @@
#include <sys/mkdev.h>
#include <sys/queue.h>
#include <sys/containerof.h>
+#include <sys/sensors.h>
#include "version.h"
#include "common/common.h"
@@ -180,6 +181,18 @@ static kmutex_t t4_uld_list_lock;
static SLIST_HEAD(, uld_info) t4_uld_list;
#endif
+static int t4_temperature_read(void *, sensor_ioctl_scalar_t *);
+static int t4_voltage_read(void *, sensor_ioctl_scalar_t *);
+static const ksensor_ops_t t4_temp_ops = {
+ .kso_kind = ksensor_kind_temperature,
+ .kso_scalar = t4_temperature_read
+};
+
+static const ksensor_ops_t t4_volt_ops = {
+ .kso_kind = ksensor_kind_voltage,
+ .kso_scalar = t4_voltage_read
+};
+
int
_init(void)
{
@@ -758,7 +771,23 @@ ofld_queues:
}
sc->flags |= INTR_ALLOCATED;
- ASSERT(rc == DDI_SUCCESS);
+ if ((rc = ksensor_create_scalar_pcidev(dip, SENSOR_KIND_TEMPERATURE,
+ &t4_temp_ops, sc, "temp", &sc->temp_sensor)) != 0) {
+ cxgb_printf(dip, CE_WARN, "failed to create temperature "
+ "sensor: %d", rc);
+ rc = DDI_FAILURE;
+ goto done;
+ }
+
+ if ((rc = ksensor_create_scalar_pcidev(dip, SENSOR_KIND_VOLTAGE,
+ &t4_volt_ops, sc, "vdd", &sc->volt_sensor)) != 0) {
+ cxgb_printf(dip, CE_WARN, "failed to create voltage "
+ "sensor: %d", rc);
+ rc = DDI_FAILURE;
+ goto done;
+ }
+
+
ddi_report_dev(dip);
/*
@@ -849,6 +878,7 @@ t4_devo_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
}
/* Safe to call no matter what */
+ (void) ksensor_remove(dip, KSENSOR_ALL_IDS);
ddi_prop_remove_all(dip);
ddi_remove_minor_node(dip, NULL);
@@ -2919,3 +2949,76 @@ t4_iterate(void (*func)(int, void *), void *arg)
}
#endif
+
+static int
+t4_sensor_read(struct adapter *sc, uint32_t diag, uint32_t *valp)
+{
+ int rc;
+ struct port_info *pi = sc->port[0];
+ uint32_t param, val;
+
+ rc = begin_synchronized_op(pi, 1, 1);
+ if (rc != 0) {
+ return (rc);
+ }
+ param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
+ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
+ V_FW_PARAMS_PARAM_Y(diag);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
+ end_synchronized_op(pi, 1);
+
+ if (rc != 0) {
+ return (rc);
+ }
+
+ if (val == 0) {
+ return (EIO);
+ }
+
+ *valp = val;
+ return (0);
+}
+
+static int
+t4_temperature_read(void *arg, sensor_ioctl_scalar_t *scalar)
+{
+ int ret;
+ struct adapter *sc = arg;
+ uint32_t val;
+
+ ret = t4_sensor_read(sc, FW_PARAM_DEV_DIAG_TMP, &val);
+ if (ret != 0) {
+ return (ret);
+ }
+
+ /*
+ * The device measures temperature in units of 1 degree Celsius. We
+ * don't know its precision.
+ */
+ scalar->sis_unit = SENSOR_UNIT_CELSIUS;
+ scalar->sis_gran = 1;
+ scalar->sis_prec = 0;
+ scalar->sis_value = val;
+
+ return (0);
+}
+
+static int
+t4_voltage_read(void *arg, sensor_ioctl_scalar_t *scalar)
+{
+ int ret;
+ struct adapter *sc = arg;
+ uint32_t val;
+
+ ret = t4_sensor_read(sc, FW_PARAM_DEV_DIAG_VDD, &val);
+ if (ret != 0) {
+ return (ret);
+ }
+
+ scalar->sis_unit = SENSOR_UNIT_VOLTS;
+ scalar->sis_gran = 1000;
+ scalar->sis_prec = 0;
+ scalar->sis_value = val;
+
+ return (0);
+}
diff --git a/usr/src/uts/common/io/igb/igb_sensor.c b/usr/src/uts/common/io/igb/igb_sensor.c
index b233af2a92..3b41a853c0 100644
--- a/usr/src/uts/common/io/igb/igb_sensor.c
+++ b/usr/src/uts/common/io/igb/igb_sensor.c
@@ -72,7 +72,7 @@
#define EMC1413_REG_EXT3_DIODE_LO 0x2b
static int
-igb_sensor_reg_temp(void *arg, sensor_ioctl_temperature_t *temp)
+igb_sensor_reg_temperature(void *arg, sensor_ioctl_scalar_t *scalar)
{
igb_t *igb = arg;
uint32_t reg;
@@ -87,17 +87,17 @@ igb_sensor_reg_temp(void *arg, sensor_ioctl_temperature_t *temp)
return (EIO);
}
- temp->sit_unit = SENSOR_UNIT_CELSIUS;
- temp->sit_gran = E1000_THMJT_RESOLUTION;
- temp->sit_prec = E1000_THMJT_PRECISION;
- temp->sit_temp = E1000_THMJT_TEMP(reg);
+ scalar->sis_unit = SENSOR_UNIT_CELSIUS;
+ scalar->sis_gran = E1000_THMJT_RESOLUTION;
+ scalar->sis_prec = E1000_THMJT_PRECISION;
+ scalar->sis_value = E1000_THMJT_TEMP(reg);
return (0);
}
static const ksensor_ops_t igb_sensor_reg_ops = {
.kso_kind = ksensor_kind_temperature,
- .kso_temp = igb_sensor_reg_temp
+ .kso_scalar = igb_sensor_reg_temperature
};
static boolean_t
@@ -106,8 +106,9 @@ igb_sensors_create_minors(igb_t *igb)
int ret;
igb_sensors_t *sp = &igb->igb_sensors;
- if ((ret = ksensor_create_temp_pcidev(igb->dip, &igb_sensor_reg_ops,
- igb, "builtin", &sp->isn_reg_ksensor)) != 0) {
+ if ((ret = ksensor_create_scalar_pcidev(igb->dip,
+ SENSOR_KIND_TEMPERATURE, &igb_sensor_reg_ops, igb, "builtin",
+ &sp->isn_reg_ksensor)) != 0) {
igb_log(igb, IGB_LOG_ERROR, "failed to create main sensor: %d",
ret);
return (B_FALSE);
diff --git a/usr/src/uts/common/io/ksensor/ksensor_drv.c b/usr/src/uts/common/io/ksensor/ksensor_drv.c
index 6810e11758..70e99287a2 100644
--- a/usr/src/uts/common/io/ksensor/ksensor_drv.c
+++ b/usr/src/uts/common/io/ksensor/ksensor_drv.c
@@ -90,15 +90,15 @@ ksensor_ioctl_kind(minor_t min, intptr_t arg, int mode)
}
static int
-ksensor_ioctl_temp(minor_t min, intptr_t arg, int mode)
+ksensor_ioctl_scalar(minor_t min, intptr_t arg, int mode)
{
int ret;
- sensor_ioctl_temperature_t temp;
+ sensor_ioctl_scalar_t scalar;
- bzero(&temp, sizeof (temp));
- ret = ksensor_op_temperature((id_t)min, &temp);
+ bzero(&scalar, sizeof (scalar));
+ ret = ksensor_op_scalar((id_t)min, &scalar);
if (ret == 0) {
- if (ddi_copyout(&temp, (void *)arg, sizeof (temp),
+ if (ddi_copyout(&scalar, (void *)arg, sizeof (scalar),
mode & FKIOCTL) != 0) {
ret = EFAULT;
}
@@ -118,10 +118,10 @@ ksensor_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
m = getminor(dev);
switch (cmd) {
- case SENSOR_IOCTL_TYPE:
+ case SENSOR_IOCTL_KIND:
return (ksensor_ioctl_kind(m, arg, mode));
- case SENSOR_IOCTL_TEMPERATURE:
- return (ksensor_ioctl_temp(m, arg, mode));
+ case SENSOR_IOCTL_SCALAR:
+ return (ksensor_ioctl_scalar(m, arg, mode));
default:
return (ENOTTY);
}
diff --git a/usr/src/uts/common/io/ksensor/ksensor_test.c b/usr/src/uts/common/io/ksensor/ksensor_test.c
index ea71ab5559..a98a8b77eb 100644
--- a/usr/src/uts/common/io/ksensor/ksensor_test.c
+++ b/usr/src/uts/common/io/ksensor/ksensor_test.c
@@ -32,21 +32,53 @@ typedef struct ksensor_test {
id_t kt_sensor3;
id_t kt_sensor4;
id_t kt_sensor5;
+ id_t kt_volt;
+ id_t kt_current;
} ksensor_test_t;
static int
-ksensor_test_temperature(void *arg, sensor_ioctl_temperature_t *temp)
+ksensor_test_temp(void *arg, sensor_ioctl_scalar_t *scalar)
{
- temp->sit_unit = SENSOR_UNIT_CELSIUS;
- temp->sit_gran = 4;
- temp->sit_prec = -2;
- temp->sit_temp = 23;
+ scalar->sis_unit = SENSOR_UNIT_CELSIUS;
+ scalar->sis_gran = 4;
+ scalar->sis_prec = -2;
+ scalar->sis_value = 23;
return (0);
}
static const ksensor_ops_t ksensor_test_temp_ops = {
- ksensor_kind_temperature,
- ksensor_test_temperature
+ .kso_kind = ksensor_kind_temperature,
+ .kso_scalar = ksensor_test_temp
+};
+
+static int
+ksensor_test_volt(void *arg, sensor_ioctl_scalar_t *scalar)
+{
+ scalar->sis_unit = SENSOR_UNIT_VOLTS;
+ scalar->sis_gran = 1000;
+ scalar->sis_prec = 0;
+ scalar->sis_value = 3300;
+ return (0);
+}
+
+static const ksensor_ops_t ksensor_test_volt_ops = {
+ .kso_kind = ksensor_kind_voltage,
+ .kso_scalar = ksensor_test_volt
+};
+
+static int
+ksensor_test_current(void *arg, sensor_ioctl_scalar_t *scalar)
+{
+ scalar->sis_unit = SENSOR_UNIT_AMPS;
+ scalar->sis_gran = 10;
+ scalar->sis_prec = 0;
+ scalar->sis_value = 5;
+ return (0);
+}
+
+static const ksensor_ops_t ksensor_test_current_ops = {
+ .kso_kind = ksensor_kind_current,
+ .kso_scalar = ksensor_test_current
};
static int
@@ -56,14 +88,14 @@ ksensor_test_kind_eio(void *arg, sensor_ioctl_kind_t *kindp)
}
static int
-ksensor_test_temp_eio(void *arg, sensor_ioctl_temperature_t *tempp)
+ksensor_test_temp_eio(void *arg, sensor_ioctl_scalar_t *scalar)
{
return (EIO);
}
static const ksensor_ops_t ksensor_test_eio_ops = {
- ksensor_test_kind_eio,
- ksensor_test_temp_eio
+ .kso_kind = ksensor_test_kind_eio,
+ .kso_scalar = ksensor_test_temp_eio
};
static int
@@ -107,7 +139,7 @@ ksensor_test_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
ddi_get_instance(dip));
if ((ret = ksensor_create(dip, &ksensor_test_temp_ops, NULL, buf,
"ddi_sensor:test", &kt->kt_sensor3)) != 0) {
- dev_err(dip, CE_WARN, "failed to attatch sensor %s: %d", buf,
+ dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf,
ret);
goto err;
}
@@ -116,7 +148,7 @@ ksensor_test_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
ddi_get_instance(dip));
if ((ret = ksensor_create(dip, &ksensor_test_temp_ops, NULL, buf,
"ddi_sensor:test", &kt->kt_sensor4)) != 0) {
- dev_err(dip, CE_WARN, "failed to attatch sensor %s: %d", buf,
+ dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf,
ret);
goto err;
}
@@ -125,7 +157,25 @@ ksensor_test_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
ddi_get_instance(dip));
if ((ret = ksensor_create(dip, &ksensor_test_eio_ops, NULL, buf,
"ddi_sensor:test", &kt->kt_sensor5)) != 0) {
- dev_err(dip, CE_WARN, "failed to attatch sensor %s: %d", buf,
+ dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf,
+ ret);
+ goto err;
+ }
+
+ (void) snprintf(buf, sizeof (buf), "test.volt.%d.1",
+ ddi_get_instance(dip));
+ if ((ret = ksensor_create(dip, &ksensor_test_volt_ops, NULL, buf,
+ "ddi_sensor:test", &kt->kt_volt)) != 0) {
+ dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf,
+ ret);
+ goto err;
+ }
+
+ (void) snprintf(buf, sizeof (buf), "test.current.%d.1",
+ ddi_get_instance(dip));
+ if ((ret = ksensor_create(dip, &ksensor_test_current_ops, NULL, buf,
+ "ddi_sensor:test", &kt->kt_current)) != 0) {
+ dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf,
ret);
goto err;
}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c
index dbad9be958..90964d2fd1 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.c
@@ -1066,6 +1066,11 @@ mlxcx_teardown(mlxcx_t *mlxp)
mlxcx_intr_disable(mlxp);
}
+ if (mlxp->mlx_attach & MLXCX_ATTACH_SENSORS) {
+ mlxcx_teardown_sensors(mlxp);
+ mlxp->mlx_attach &= ~MLXCX_ATTACH_SENSORS;
+ }
+
if (mlxp->mlx_attach & MLXCX_ATTACH_CHKTIMERS) {
mlxcx_teardown_checktimers(mlxp);
mlxp->mlx_attach &= ~MLXCX_ATTACH_CHKTIMERS;
@@ -1800,7 +1805,7 @@ mlxcx_setup_ports(mlxcx_t *mlxp)
p->mlx_port_event.mla_mlx = mlxp;
p->mlx_port_event.mla_port = p;
mutex_init(&p->mlx_port_event.mla_mtx, NULL,
- MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_async_intr_pri));
p->mlp_init |= MLXCX_PORT_INIT;
mutex_init(&p->mlp_mtx, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(mlxp->mlx_intr_pri));
@@ -2716,7 +2721,7 @@ mlxcx_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
for (i = 0; i <= MLXCX_FUNC_ID_MAX; i++) {
mlxp->mlx_npages_req[i].mla_mlx = mlxp;
mutex_init(&mlxp->mlx_npages_req[i].mla_mtx, NULL,
- MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_async_intr_pri));
}
mlxp->mlx_attach |= MLXCX_ATTACH_ASYNC_TQ;
@@ -2869,6 +2874,11 @@ mlxcx_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
mlxp->mlx_attach |= MLXCX_ATTACH_CHKTIMERS;
+ if (!mlxcx_setup_sensors(mlxp)) {
+ goto err;
+ }
+ mlxp->mlx_attach |= MLXCX_ATTACH_SENSORS;
+
/*
* Finally, tell MAC that we exist!
*/
@@ -2913,7 +2923,6 @@ static struct dev_ops mlxcx_dev_ops = {
.devo_attach = mlxcx_attach,
.devo_detach = mlxcx_detach,
.devo_reset = nodev,
- .devo_power = ddi_power,
.devo_quiesce = ddi_quiesce_not_supported,
.devo_cb_ops = &mlxcx_cb_ops
};
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
index 77d36447c6..e28fe89806 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -1009,6 +1009,15 @@ typedef struct {
uint64_t mldp_wq_check_interval_sec;
} mlxcx_drv_props_t;
+typedef struct {
+ mlxcx_t *mlts_mlx;
+ uint8_t mlts_index;
+ id_t mlts_ksensor;
+ int16_t mlts_value;
+ int16_t mlts_max_value;
+ uint8_t mlts_name[MLXCX_MTMP_NAMELEN];
+} mlxcx_temp_sensor_t;
+
typedef enum {
MLXCX_ATTACH_FM = 1 << 0,
MLXCX_ATTACH_PCI_CONFIG = 1 << 1,
@@ -1028,6 +1037,7 @@ typedef enum {
MLXCX_ATTACH_CAPS = 1 << 15,
MLXCX_ATTACH_CHKTIMERS = 1 << 16,
MLXCX_ATTACH_ASYNC_TQ = 1 << 17,
+ MLXCX_ATTACH_SENSORS = 1 << 18
} mlxcx_attach_progress_t;
struct mlxcx {
@@ -1082,6 +1092,7 @@ struct mlxcx {
* Interrupts
*/
uint_t mlx_intr_pri;
+ uint_t mlx_async_intr_pri;
uint_t mlx_intr_type; /* always MSI-X */
int mlx_intr_count;
size_t mlx_intr_size; /* allocation size */
@@ -1171,6 +1182,12 @@ struct mlxcx {
ddi_periodic_t mlx_eq_checktimer;
ddi_periodic_t mlx_cq_checktimer;
ddi_periodic_t mlx_wq_checktimer;
+
+ /*
+ * Sensors
+ */
+ uint8_t mlx_temp_nsensors;
+ mlxcx_temp_sensor_t *mlx_temp_sensors;
};
/*
@@ -1446,6 +1463,12 @@ extern const char *mlxcx_port_status_string(mlxcx_port_status_t);
extern const char *mlxcx_event_name(mlxcx_event_t);
+/*
+ * Sensor Functions
+ */
+extern boolean_t mlxcx_setup_sensors(mlxcx_t *);
+extern void mlxcx_teardown_sensors(mlxcx_t *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
index c8eb1335ea..32c40ec3ea 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c
@@ -667,7 +667,8 @@ static void
mlxcx_cmd_init(mlxcx_t *mlxp, mlxcx_cmd_t *cmd)
{
bzero(cmd, sizeof (*cmd));
- mutex_init(&cmd->mlcmd_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&cmd->mlcmd_lock, NULL, MUTEX_DRIVER,
+ DDI_INTR_PRI(mlxp->mlx_async_intr_pri));
cv_init(&cmd->mlcmd_cv, NULL, CV_DRIVER, NULL);
cmd->mlcmd_token = id_alloc(mlxp->mlx_cmd.mcmd_tokens);
cmd->mlcmd_poll = mlxp->mlx_cmd.mcmd_polled;
@@ -1687,6 +1688,10 @@ mlxcx_reg_name(mlxcx_register_id_t rid)
return ("PPCNT");
case MLXCX_REG_PPLM:
return ("PPLM");
+ case MLXCX_REG_MTCAP:
+ return ("MTCAP");
+ case MLXCX_REG_MTMP:
+ return ("MTMP");
default:
return ("???");
}
@@ -1736,6 +1741,12 @@ mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod,
case MLXCX_REG_PPLM:
dsize = sizeof (mlxcx_reg_pplm_t);
break;
+ case MLXCX_REG_MTCAP:
+ dsize = sizeof (mlxcx_reg_mtcap_t);
+ break;
+ case MLXCX_REG_MTMP:
+ dsize = sizeof (mlxcx_reg_mtmp_t);
+ break;
default:
dsize = 0;
VERIFY(0);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
index 89645bb2b1..941eb0f9e7 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -809,19 +809,32 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
if (wq->mlwq_state & MLXCX_WQ_BUFFERS) {
+ list_t cq_buffers;
+
+ /*
+ * Take the buffers away from the CQ. If the CQ is being
+ * processed and the WQ has been stopped, a completion
+ * which does not match to a buffer will be ignored.
+ */
+ list_create(&cq_buffers, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_cq_entry));
+
+ list_move_tail(&cq_buffers, &cq->mlcq_buffers);
+
+ mutex_enter(&cq->mlcq_bufbmtx);
+ list_move_tail(&cq_buffers, &cq->mlcq_buffers_b);
+ mutex_exit(&cq->mlcq_bufbmtx);
+
+ cq->mlcq_bufcnt = 0;
+
mutex_exit(&wq->mlwq_mtx);
mutex_exit(&cq->mlcq_mtx);
/* Return any outstanding buffers to the free pool. */
- while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) {
+ while ((buf = list_remove_head(&cq_buffers)) != NULL) {
mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
}
- mutex_enter(&cq->mlcq_bufbmtx);
- while ((buf = list_remove_head(&cq->mlcq_buffers_b)) != NULL) {
- mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
- }
- mutex_exit(&cq->mlcq_bufbmtx);
- cq->mlcq_bufcnt = 0;
+ list_destroy(&cq_buffers);
s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
index f79c148d20..53ea4d683e 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c
@@ -12,6 +12,7 @@
/*
* Copyright (c) 2020, the University of Queensland
* Copyright 2020 RackTop Systems, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -922,6 +923,20 @@ lookagain:
if (added)
goto lookagain;
+ /*
+ * This check could go just after the lookagain
+ * label, but it is a hot code path so we don't
+ * want to unnecessarily grab a lock and check
+ * a flag for a relatively rare event (the ring
+ * being stopped).
+ */
+ mutex_enter(&wq->mlwq_mtx);
+ if ((wq->mlwq_state & MLXCX_WQ_STARTED) == 0) {
+ mutex_exit(&wq->mlwq_mtx);
+ goto nextcq;
+ }
+ mutex_exit(&wq->mlwq_mtx);
+
buf = list_head(&mlcq->mlcq_buffers);
mlxcx_warn(mlxp, "got completion on CQ %x but "
"no buffer matching wqe found: %x (first "
@@ -1165,6 +1180,7 @@ mlxcx_intr_setup(mlxcx_t *mlxp)
ret = ddi_intr_get_supported_types(dip, &types);
if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "Failed to get supported interrupt types");
return (B_FALSE);
}
@@ -1176,15 +1192,21 @@ mlxcx_intr_setup(mlxcx_t *mlxp)
ret = ddi_intr_get_nintrs(dip, DDI_INTR_TYPE_MSIX, &nintrs);
if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "Failed to get number of interrupts");
return (B_FALSE);
}
if (nintrs < 2) {
- mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx "
+ mlxcx_warn(mlxp, "%d MSI-X interrupts supported, but mlxcx "
"requires 2", nintrs);
return (B_FALSE);
}
ret = ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX, &navail);
+ if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp,
+ "Failed to get number of available interrupts");
+ return (B_FALSE);
+ }
if (navail < 2) {
mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx "
"requires 2", navail);
@@ -1203,10 +1225,14 @@ mlxcx_intr_setup(mlxcx_t *mlxp)
ret = ddi_intr_alloc(dip, mlxp->mlx_intr_handles, DDI_INTR_TYPE_MSIX,
0, navail, &mlxp->mlx_intr_count, DDI_INTR_ALLOC_NORMAL);
if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "Failed to allocate %d interrupts", navail);
mlxcx_intr_teardown(mlxp);
return (B_FALSE);
}
if (mlxp->mlx_intr_count < mlxp->mlx_intr_cq0 + 1) {
+ mlxcx_warn(mlxp, "%d MSI-X interrupts allocated, but mlxcx "
+ "requires %d", mlxp->mlx_intr_count,
+ mlxp->mlx_intr_cq0 + 1);
mlxcx_intr_teardown(mlxp);
return (B_FALSE);
}
@@ -1214,10 +1240,29 @@ mlxcx_intr_setup(mlxcx_t *mlxp)
ret = ddi_intr_get_pri(mlxp->mlx_intr_handles[0], &mlxp->mlx_intr_pri);
if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "Failed to get interrupt priority");
mlxcx_intr_teardown(mlxp);
return (B_FALSE);
}
+ /*
+ * Set the interrupt priority for the asynchronous handler higher
+ * than the ring handlers. Some operations which issue commands,
+ * and thus rely on the async interrupt handler for posting
+ * completion, do so with a CQ mutex held. The CQ mutex is also
+ * acquired during ring processing, so if the ring processing vector
+ * happens to be assigned to the same CPU as the async vector
+ * it can hold off the async interrupt thread and lead to a deadlock.
+ * By assigning a higher priority to the async vector, it will
+ * always be dispatched.
+ */
+ mlxp->mlx_async_intr_pri = mlxp->mlx_intr_pri;
+ if (mlxp->mlx_async_intr_pri < LOCK_LEVEL) {
+ mlxp->mlx_async_intr_pri++;
+ } else {
+ mlxp->mlx_intr_pri--;
+ }
+
mlxp->mlx_eqs_size = mlxp->mlx_intr_count *
sizeof (mlxcx_event_queue_t);
mlxp->mlx_eqs = kmem_zalloc(mlxp->mlx_eqs_size, KM_SLEEP);
@@ -1227,8 +1272,11 @@ mlxcx_intr_setup(mlxcx_t *mlxp)
* mutex and avl tree to be init'ed - so do it now.
*/
for (i = 0; i < mlxp->mlx_intr_count; ++i) {
+ uint_t pri = (i == 0) ? mlxp->mlx_async_intr_pri :
+ mlxp->mlx_intr_pri;
+
mutex_init(&mlxp->mlx_eqs[i].mleq_mtx, NULL, MUTEX_DRIVER,
- DDI_INTR_PRI(mlxp->mlx_intr_pri));
+ DDI_INTR_PRI(pri));
cv_init(&mlxp->mlx_eqs[i].mleq_cv, NULL, CV_DRIVER, NULL);
if (i < mlxp->mlx_intr_cq0)
@@ -1239,9 +1287,38 @@ mlxcx_intr_setup(mlxcx_t *mlxp)
offsetof(mlxcx_completion_queue_t, mlcq_eq_entry));
}
+ while (mlxp->mlx_async_intr_pri > DDI_INTR_PRI_MIN) {
+ ret = ddi_intr_set_pri(mlxp->mlx_intr_handles[0],
+ mlxp->mlx_async_intr_pri);
+ if (ret == DDI_SUCCESS)
+ break;
+ mlxcx_note(mlxp,
+ "!Failed to set interrupt priority to %u for "
+ "async interrupt vector", mlxp->mlx_async_intr_pri);
+ /*
+ * If it was not possible to set the IPL for the async
+ * interrupt to the desired value, then try a lower priority.
+ * Some PSMs can only accommodate a limited number of vectors
+ * at eatch priority level (or group of priority levels). Since
+ * the async priority must be set higher than the ring
+ * handlers, lower both. The ring handler priority is set
+ * below.
+ */
+ mlxp->mlx_async_intr_pri--;
+ mlxp->mlx_intr_pri--;
+ }
+
+ if (mlxp->mlx_async_intr_pri == DDI_INTR_PRI_MIN) {
+ mlxcx_warn(mlxp, "Failed to find an interrupt priority for "
+ "async interrupt vector");
+ mlxcx_intr_teardown(mlxp);
+ return (B_FALSE);
+ }
+
ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[0], mlxcx_intr_async,
(caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[0]);
if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "Failed to add async interrupt handler");
mlxcx_intr_teardown(mlxp);
return (B_FALSE);
}
@@ -1268,9 +1345,29 @@ mlxcx_intr_setup(mlxcx_t *mlxp)
eqt = MLXCX_EQ_TYPE_RX;
}
+ while (mlxp->mlx_intr_pri >= DDI_INTR_PRI_MIN) {
+ ret = ddi_intr_set_pri(mlxp->mlx_intr_handles[i],
+ mlxp->mlx_intr_pri);
+ if (ret == DDI_SUCCESS)
+ break;
+ mlxcx_note(mlxp, "!Failed to set interrupt priority to "
+ "%u for interrupt vector %d",
+ mlxp->mlx_intr_pri, i);
+ mlxp->mlx_intr_pri--;
+ }
+ if (mlxp->mlx_intr_pri < DDI_INTR_PRI_MIN) {
+ mlxcx_warn(mlxp,
+ "Failed to find an interrupt priority for "
+ "interrupt vector %d", i);
+ mlxcx_intr_teardown(mlxp);
+ return (B_FALSE);
+ }
+
ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[i],
mlxcx_intr_n, (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[i]);
if (ret != DDI_SUCCESS) {
+ mlxcx_warn(mlxp, "Failed to add interrupt handler %d",
+ i);
mlxcx_intr_teardown(mlxp);
return (B_FALSE);
}
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
index 1987ae06ea..4b92de92b8 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h
@@ -2530,6 +2530,30 @@ typedef struct {
uint16be_t mlrd_pplm_fec_override_admin_fdr10;
} mlxcx_reg_pplm_t;
+typedef struct {
+ uint8_t mlrd_mtcap_rsvd[3];
+ uint8_t mlrd_mtcap_sensor_count;
+ uint8_t mlrd_mtcap_rsvd1[4];
+ uint64be_t mlrd_mtcap_sensor_map;
+} mlxcx_reg_mtcap_t;
+
+#define MLXCX_MTMP_NAMELEN 8
+
+typedef struct {
+ uint8_t mlrd_mtmp_rsvd[2];
+ uint16be_t mlrd_mtmp_sensor_index;
+ uint8_t mlrd_mtmp_rsvd1[2];
+ uint16be_t mlrd_mtmp_temperature;
+ bits16_t mlrd_mtmp_max_flags;
+ uint16be_t mlrd_mtmp_max_temperature;
+ bits16_t mlrd_mtmp_tee;
+ uint16be_t mlrd_mtmp_temp_thresh_hi;
+ uint8_t mlrd_mtmp_rsvd2[2];
+ uint16be_t mlrd_mtmp_temp_thresh_lo;
+ uint8_t mlrd_mtmp_rsvd3[4];
+ uint8_t mlrd_mtmp_name[MLXCX_MTMP_NAMELEN];
+} mlxcx_reg_mtmp_t;
+
typedef enum {
MLXCX_REG_PMTU = 0x5003,
MLXCX_REG_PTYS = 0x5004,
@@ -2540,6 +2564,8 @@ typedef enum {
MLXCX_REG_MCIA = 0x9014,
MLXCX_REG_PPCNT = 0x5008,
MLXCX_REG_PPLM = 0x5023,
+ MLXCX_REG_MTCAP = 0x9009,
+ MLXCX_REG_MTMP = 0x900A
} mlxcx_register_id_t;
typedef union {
@@ -2551,6 +2577,8 @@ typedef union {
mlxcx_reg_mcia_t mlrd_mcia;
mlxcx_reg_ppcnt_t mlrd_ppcnt;
mlxcx_reg_pplm_t mlrd_pplm;
+ mlxcx_reg_mtcap_t mlrd_mtcap;
+ mlxcx_reg_mtmp_t mlrd_mtmp;
} mlxcx_register_data_t;
typedef enum {
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_sensor.c b/usr/src/uts/common/io/mlxcx/mlxcx_sensor.c
new file mode 100644
index 0000000000..6d2c7d0778
--- /dev/null
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_sensor.c
@@ -0,0 +1,126 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Oxide Computer Company
+ */
+
+#include <mlxcx.h>
+#include <sys/sensors.h>
+
+/*
+ * The PRM indicates that the temperature is measured in 1/8th degrees.
+ */
+#define MLXCX_TEMP_GRAN 8
+
+/*
+ * Read a single temperature sensor entry. The ksensor framework guarantees that
+ * it will only call this once for a given sensor at any time, though multiple
+ * sensors can be in parallel.
+ */
+static int
+mlxcx_temperature_read(void *arg, sensor_ioctl_scalar_t *scalar)
+{
+ boolean_t ok;
+ uint16_t tmp;
+ mlxcx_register_data_t data;
+ mlxcx_temp_sensor_t *sensor = arg;
+ mlxcx_t *mlxp = sensor->mlts_mlx;
+
+ bzero(&data, sizeof (data));
+ data.mlrd_mtmp.mlrd_mtmp_sensor_index = to_be16(sensor->mlts_index);
+ ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_MTMP, &data);
+ if (!ok) {
+ return (EIO);
+ }
+
+ tmp = from_be16(data.mlrd_mtmp.mlrd_mtmp_temperature);
+ sensor->mlts_value = (int16_t)tmp;
+ tmp = from_be16(data.mlrd_mtmp.mlrd_mtmp_max_temperature);
+ sensor->mlts_max_value = (int16_t)tmp;
+ bcopy(data.mlrd_mtmp.mlrd_mtmp_name, sensor->mlts_name,
+ sizeof (sensor->mlts_name));
+
+ scalar->sis_unit = SENSOR_UNIT_CELSIUS;
+ scalar->sis_gran = MLXCX_TEMP_GRAN;
+ scalar->sis_prec = 0;
+ scalar->sis_value = (int64_t)sensor->mlts_value;
+
+ return (0);
+}
+
+static const ksensor_ops_t mlxcx_temp_ops = {
+ .kso_kind = ksensor_kind_temperature,
+ .kso_scalar = mlxcx_temperature_read
+};
+
+void
+mlxcx_teardown_sensors(mlxcx_t *mlxp)
+{
+ if (mlxp->mlx_temp_nsensors == 0)
+ return;
+ (void) ksensor_remove(mlxp->mlx_dip, KSENSOR_ALL_IDS);
+ kmem_free(mlxp->mlx_temp_sensors, sizeof (mlxcx_temp_sensor_t) *
+ mlxp->mlx_temp_nsensors);
+}
+
+boolean_t
+mlxcx_setup_sensors(mlxcx_t *mlxp)
+{
+ mlxcx_register_data_t data;
+ boolean_t ok;
+
+ mlxp->mlx_temp_nsensors = 0;
+ bzero(&data, sizeof (data));
+ ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
+ MLXCX_REG_MTCAP, &data);
+ if (!ok) {
+ return (B_FALSE);
+ }
+
+ if (data.mlrd_mtcap.mlrd_mtcap_sensor_count == 0) {
+ return (B_TRUE);
+ }
+
+ mlxp->mlx_temp_nsensors = data.mlrd_mtcap.mlrd_mtcap_sensor_count;
+ mlxp->mlx_temp_sensors = kmem_zalloc(sizeof (mlxcx_temp_sensor_t) *
+ mlxp->mlx_temp_nsensors, KM_SLEEP);
+
+ for (uint8_t i = 0; i < mlxp->mlx_temp_nsensors; i++) {
+ char buf[32];
+ int ret;
+
+ if (snprintf(buf, sizeof (buf), "temp%u", i) >= sizeof (buf)) {
+ mlxcx_warn(mlxp, "sensor name %u would overflow "
+ "internal buffer");
+ goto err;
+ }
+
+ mlxp->mlx_temp_sensors[i].mlts_mlx = mlxp;
+ mlxp->mlx_temp_sensors[i].mlts_index = i;
+
+ ret = ksensor_create_scalar_pcidev(mlxp->mlx_dip,
+ SENSOR_KIND_TEMPERATURE, &mlxcx_temp_ops,
+ &mlxp->mlx_temp_sensors[i], buf,
+ &mlxp->mlx_temp_sensors[i].mlts_ksensor);
+ if (ret != 0) {
+ mlxcx_warn(mlxp, "failed to create temp sensor %s: %d",
+ buf, ret);
+ goto err;
+ }
+ }
+
+ return (B_TRUE);
+err:
+ mlxcx_teardown_sensors(mlxp);
+ return (B_FALSE);
+}
diff --git a/usr/src/uts/common/io/tem.c b/usr/src/uts/common/io/tem.c
index 573e10cd66..525aa5f585 100644
--- a/usr/src/uts/common/io/tem.c
+++ b/usr/src/uts/common/io/tem.c
@@ -524,10 +524,41 @@ tems_check_videomode(struct vis_devinit *tp)
}
static void
-tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width)
+tems_setup_font(screen_size_t height, screen_size_t width)
{
bitmap_data_t *font_data;
int i;
+
+ /*
+ * set_font() will select an appropriate sized font for
+ * the number of rows and columns selected. If we don't
+ * have a font that will fit, then it will use the
+ * default builtin font and adjust the rows and columns
+ * to fit on the screen.
+ */
+ font_data = set_font(&tems.ts_c_dimension.height,
+ &tems.ts_c_dimension.width, height, width);
+
+ /*
+ * To use loaded font, we assign the loaded font data to tems.ts_font.
+ * In case of next load, the previously loaded data is freed
+ * when loading the new font.
+ */
+ for (i = 0; i < VFNT_MAPS; i++) {
+ tems.ts_font.vf_map[i] =
+ font_data->font->vf_map[i];
+ tems.ts_font.vf_map_count[i] =
+ font_data->font->vf_map_count[i];
+ }
+
+ tems.ts_font.vf_bytes = font_data->font->vf_bytes;
+ tems.ts_font.vf_width = font_data->font->vf_width;
+ tems.ts_font.vf_height = font_data->font->vf_height;
+}
+
+static void
+tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width)
+{
int old_blank_buf_size = tems.ts_c_dimension.width *
sizeof (*tems.ts_blank_line);
@@ -546,6 +577,9 @@ tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width)
tems.ts_c_dimension.height = tp->height;
tems.ts_callbacks = &tem_safe_text_callbacks;
+ tems_setup_font(16 * tp->height + BORDER_PIXELS,
+ 8 * tp->width + BORDER_PIXELS);
+
break;
case VIS_PIXEL:
@@ -559,33 +593,11 @@ tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width)
}
tems.ts_c_dimension.height = (screen_size_t)height;
tems.ts_c_dimension.width = (screen_size_t)width;
-
tems.ts_p_dimension.height = tp->height;
tems.ts_p_dimension.width = tp->width;
-
tems.ts_callbacks = &tem_safe_pix_callbacks;
- /*
- * set_font() will select a appropriate sized font for
- * the number of rows and columns selected. If we don't
- * have a font that will fit, then it will use the
- * default builtin font. set_font() will adjust the rows
- * and columns to fit on the screen.
- */
- font_data = set_font(&tems.ts_c_dimension.height,
- &tems.ts_c_dimension.width,
- tems.ts_p_dimension.height,
- tems.ts_p_dimension.width);
-
- for (i = 0; i < VFNT_MAPS; i++) {
- tems.ts_font.vf_map[i] =
- font_data->font->vf_map[i];
- tems.ts_font.vf_map_count[i] =
- font_data->font->vf_map_count[i];
- }
- tems.ts_font.vf_bytes = font_data->font->vf_bytes;
- tems.ts_font.vf_width = font_data->font->vf_width;
- tems.ts_font.vf_height = font_data->font->vf_height;
+ tems_setup_font(tp->height, tp->width);
tems.ts_p_offset.y = (tems.ts_p_dimension.height -
(tems.ts_c_dimension.height * tems.ts_font.vf_height)) / 2;
@@ -594,9 +606,7 @@ tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width)
tems.ts_pix_data_size =
tems.ts_font.vf_width * tems.ts_font.vf_height;
-
tems.ts_pix_data_size *= 4;
-
tems.ts_pdepth = tp->depth;
break;
@@ -963,6 +973,7 @@ tems_get_initial_color(tem_color_t *pcolor)
if (inverse_screen)
flags |= TEM_ATTR_SCREEN_REVERSE;
+#ifdef _HAVE_TEM_FIRMWARE
if (flags != 0) {
/*
* If either reverse flag is set, the screen is in
@@ -980,6 +991,21 @@ tems_get_initial_color(tem_color_t *pcolor)
if (pcolor->bg_color == ANSI_COLOR_WHITE)
flags |= TEM_ATTR_BRIGHT_BG;
}
+#else
+ if (flags != 0) {
+ if (pcolor->fg_color == ANSI_COLOR_WHITE)
+ flags |= TEM_ATTR_BRIGHT_BG;
+
+ if (pcolor->fg_color == ANSI_COLOR_BLACK)
+ flags &= ~TEM_ATTR_BRIGHT_BG;
+ } else {
+ /*
+ * In case of black on white we want bright white for BG.
+ */
+ if (pcolor->bg_color == ANSI_COLOR_WHITE)
+ flags |= TEM_ATTR_BRIGHT_BG;
+ }
+#endif
pcolor->a_flags = flags;
}
diff --git a/usr/src/uts/common/io/tem_safe.c b/usr/src/uts/common/io/tem_safe.c
index 5008d4a4d6..8d47a00d5f 100644
--- a/usr/src/uts/common/io/tem_safe.c
+++ b/usr/src/uts/common/io/tem_safe.c
@@ -129,9 +129,12 @@ static void tem_safe_copy_area(struct tem_vt_state *tem,
screen_pos_t e_col, screen_pos_t e_row,
screen_pos_t t_col, screen_pos_t t_row,
cred_t *credp, enum called_from called_from);
+#if 0
+/* Currently unused */
static void tem_safe_image_display(struct tem_vt_state *, uchar_t *,
int, int, screen_pos_t, screen_pos_t,
cred_t *, enum called_from);
+#endif
static void tem_safe_bell(struct tem_vt_state *tem,
enum called_from called_from);
static void tem_safe_pix_clear_prom_output(struct tem_vt_state *tem,
@@ -1568,6 +1571,7 @@ tem_safe_text_display(struct tem_vt_state *tem, term_char_t *string,
}
}
+#if 0
/*
* This function is used to blit a rectangular color image,
* unperturbed on the underlying framebuffer, to render
@@ -1600,6 +1604,7 @@ tem_safe_image_display(struct tem_vt_state *tem, uchar_t *image,
mutex_exit(&tem->tvs_lock);
mutex_exit(&tems.ts_lock);
}
+#endif
/*ARGSUSED*/
void
@@ -2385,12 +2390,22 @@ tem_safe_get_attr(struct tem_vt_state *tem, text_color_t *fg,
static void
tem_safe_get_color(text_color_t *fg, text_color_t *bg, term_char_t c)
{
+ boolean_t bold_font;
+
*fg = c.tc_fg_color;
*bg = c.tc_bg_color;
+ bold_font = tems.ts_font.vf_map_count[VFNT_MAP_BOLD] != 0;
+
+ /*
+ * If we have both normal and bold font components,
+ * we use bold font for TEM_ATTR_BOLD.
+ * The bright color is traditionally used with TEM_ATTR_BOLD,
+ * in case there is no bold font.
+ */
if (c.tc_fg_color < XLATE_NCOLORS) {
- if (TEM_ATTR_ISSET(c.tc_char,
- TEM_ATTR_BRIGHT_FG | TEM_ATTR_BOLD))
+ if (TEM_ATTR_ISSET(c.tc_char, TEM_ATTR_BRIGHT_FG) ||
+ (TEM_ATTR_ISSET(c.tc_char, TEM_ATTR_BOLD) && !bold_font))
*fg = brt_xlate[c.tc_fg_color];
else
*fg = dim_xlate[c.tc_fg_color];
diff --git a/usr/src/uts/common/io/usb/usba/hubdi.c b/usr/src/uts/common/io/usb/usba/hubdi.c
index 99d75edce3..5207a51490 100644
--- a/usr/src/uts/common/io/usb/usba/hubdi.c
+++ b/usr/src/uts/common/io/usb/usba/hubdi.c
@@ -55,48 +55,45 @@ extern boolean_t consconfig_console_is_ready(void);
/*
* Prototypes for static functions
*/
-static int usba_hubdi_bus_ctl(
- dev_info_t *dip,
- dev_info_t *rdip,
- ddi_ctl_enum_t op,
- void *arg,
- void *result);
-
-static int usba_hubdi_map_fault(
- dev_info_t *dip,
- dev_info_t *rdip,
- struct hat *hat,
- struct seg *seg,
- caddr_t addr,
- struct devpage *dp,
- pfn_t pfn,
- uint_t prot,
- uint_t lock);
+static int usba_hubdi_bus_ctl(dev_info_t *dip,
+ dev_info_t *rdip,
+ ddi_ctl_enum_t op,
+ void *arg,
+ void *result);
+
+static int usba_hubdi_map_fault(dev_info_t *dip,
+ dev_info_t *rdip,
+ struct hat *hat,
+ struct seg *seg,
+ caddr_t addr,
+ struct devpage *dp,
+ pfn_t pfn,
+ uint_t prot,
+ uint_t lock);
static int hubd_busop_get_eventcookie(dev_info_t *dip,
- dev_info_t *rdip,
- char *eventname,
- ddi_eventcookie_t *cookie);
+ dev_info_t *rdip,
+ char *eventname,
+ ddi_eventcookie_t *cookie);
static int hubd_busop_add_eventcall(dev_info_t *dip,
- dev_info_t *rdip,
- ddi_eventcookie_t cookie,
- void (*callback)(dev_info_t *dip,
- ddi_eventcookie_t cookie, void *arg,
- void *bus_impldata),
- void *arg, ddi_callback_id_t *cb_id);
+ dev_info_t *rdip,
+ ddi_eventcookie_t cookie,
+ void (*callback)(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
+ void *bus_impldata),
+ void *arg, ddi_callback_id_t *cb_id);
static int hubd_busop_remove_eventcall(dev_info_t *dip,
- ddi_callback_id_t cb_id);
+ ddi_callback_id_t cb_id);
static int hubd_bus_config(dev_info_t *dip,
- uint_t flag,
- ddi_bus_config_op_t op,
- void *arg,
- dev_info_t **child);
+ uint_t flag,
+ ddi_bus_config_op_t op,
+ void *arg,
+ dev_info_t **child);
static int hubd_bus_unconfig(dev_info_t *dip,
- uint_t flag,
- ddi_bus_config_op_t op,
- void *arg);
+ uint_t flag,
+ ddi_bus_config_op_t op,
+ void *arg);
static int hubd_bus_power(dev_info_t *dip, void *impl_arg,
- pm_bus_power_op_t op, void *arg, void *result);
+ pm_bus_power_op_t op, void *arg, void *result);
static usb_port_t hubd_get_port_num(hubd_t *, struct devctl_iocdata *);
static dev_info_t *hubd_get_child_dip(hubd_t *, usb_port_t);
@@ -251,14 +248,14 @@ usba_hubdi_unregister(dev_info_t *dip)
/*ARGSUSED*/
static int
usba_hubdi_map_fault(dev_info_t *dip,
- dev_info_t *rdip,
- struct hat *hat,
- struct seg *seg,
- caddr_t addr,
- struct devpage *dp,
- pfn_t pfn,
- uint_t prot,
- uint_t lock)
+ dev_info_t *rdip,
+ struct hat *hat,
+ struct seg *seg,
+ caddr_t addr,
+ struct devpage *dp,
+ pfn_t pfn,
+ uint_t prot,
+ uint_t lock)
{
return (DDI_FAILURE);
}
@@ -269,9 +266,9 @@ usba_hubdi_map_fault(dev_info_t *dip,
*/
int
usba_hubdi_bind_root_hub(dev_info_t *dip,
- uchar_t *root_hub_config_descriptor,
- size_t config_length,
- usb_dev_descr_t *root_hub_device_descriptor)
+ uchar_t *root_hub_config_descriptor,
+ size_t config_length,
+ usb_dev_descr_t *root_hub_device_descriptor)
{
usba_device_t *usba_device;
usba_hcdi_t *hcdi = usba_hcdi_get_hcdi(dip);
@@ -1145,10 +1142,10 @@ hubd_post_power(hubd_t *hubd, usb_port_t port, pm_bp_child_pwrchg_t *bpc,
*/
static int
usba_hubdi_bus_ctl(dev_info_t *dip,
- dev_info_t *rdip,
- ddi_ctl_enum_t op,
- void *arg,
- void *result)
+ dev_info_t *rdip,
+ ddi_ctl_enum_t op,
+ void *arg,
+ void *result)
{
usba_device_t *hub_usba_device = usba_get_usba_device(rdip);
dev_info_t *root_hub_dip = hub_usba_device->usb_root_hub_dip;
@@ -1294,7 +1291,7 @@ usba_hubdi_bus_ctl(dev_info_t *dip,
/*
* hubd_config_one:
- * enumerate one child according to 'port'
+ * enumerate one child according to 'port'
*/
static boolean_t
@@ -2625,8 +2622,7 @@ hubd_restore_device_state(dev_info_t *dip, hubd_t *hubd)
/*
* wait at least 3 frames before accessing devices
- * (note that delay's minimal time is one clock tick which
- * is 10ms unless hires_tick has been changed)
+ * (note that delay's minimal time is one clock tick).
*/
mutex_exit(HUBD_MUTEX(hubd));
delay(drv_usectohz(10000));
@@ -3331,8 +3327,8 @@ hubd_set_hub_depth(hubd_t *hubd)
int rval;
usb_cr_t completion_reason;
usb_cb_flags_t cb_flags;
- usba_device_t *ud;
- uint16_t depth;
+ usba_device_t *ud;
+ uint16_t depth;
/*
* We only need to set the hub depth devices for hubs that are at least
@@ -6044,7 +6040,7 @@ hubd_ready_device(hubd_t *hubd, dev_info_t *child_dip, usba_device_t *child_ud,
child_ud->usb_active_cfg_ndx = config_index;
child_ud->usb_cfg = child_ud->usb_cfg_array[config_index];
child_ud->usb_cfg_length = config_descriptor.wTotalLength;
- child_ud->usb_cfg_value = config_descriptor.bConfigurationValue;
+ child_ud->usb_cfg_value = config_descriptor.bConfigurationValue;
child_ud->usb_n_ifs = config_descriptor.bNumInterfaces;
child_ud->usb_dip = child_dip;
@@ -6089,11 +6085,11 @@ hubd_ready_device(hubd_t *hubd, dev_info_t *child_dip, usba_device_t *child_ud,
*/
static int
hubd_create_child(dev_info_t *dip,
- hubd_t *hubd,
- usba_device_t *hubd_ud,
- usb_port_status_t port_status,
- usb_port_t port,
- int iteration)
+ hubd_t *hubd,
+ usba_device_t *hubd_ud,
+ usb_port_status_t port_status,
+ usb_port_t port,
+ int iteration)
{
dev_info_t *child_dip = NULL;
usb_dev_descr_t usb_dev_descr;
@@ -6869,9 +6865,9 @@ hubd_free_usba_device(hubd_t *hubd, usba_device_t *usba_device)
*/
static int
hubd_busop_get_eventcookie(dev_info_t *dip,
- dev_info_t *rdip,
- char *eventname,
- ddi_eventcookie_t *cookie)
+ dev_info_t *rdip,
+ char *eventname,
+ ddi_eventcookie_t *cookie)
{
hubd_t *hubd = (hubd_t *)hubd_get_soft_state(dip);
@@ -6891,12 +6887,11 @@ hubd_busop_get_eventcookie(dev_info_t *dip,
static int
hubd_busop_add_eventcall(dev_info_t *dip,
- dev_info_t *rdip,
- ddi_eventcookie_t cookie,
- void (*callback)(dev_info_t *dip,
- ddi_eventcookie_t cookie, void *arg,
- void *bus_impldata),
- void *arg, ddi_callback_id_t *cb_id)
+ dev_info_t *rdip,
+ ddi_eventcookie_t cookie,
+ void (*callback)(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
+ void *bus_impldata),
+ void *arg, ddi_callback_id_t *cb_id)
{
hubd_t *hubd = (hubd_t *)hubd_get_soft_state(dip);
usb_port_t port = hubd_child_dip2port(hubd, rdip);
@@ -7671,7 +7666,7 @@ usba_hubdi_open(dev_info_t *dip, dev_t *devp, int flags, int otyp,
/* ARGSUSED */
int
usba_hubdi_close(dev_info_t *dip, dev_t dev, int flag, int otyp,
- cred_t *credp)
+ cred_t *credp)
{
hubd_t *hubd;
diff --git a/usr/src/uts/common/mapfiles/ksensor.mapfile b/usr/src/uts/common/mapfiles/ksensor.mapfile
index 0374c957f7..51b65a2b9d 100644
--- a/usr/src/uts/common/mapfiles/ksensor.mapfile
+++ b/usr/src/uts/common/mapfiles/ksensor.mapfile
@@ -36,8 +36,10 @@ $mapfile_version 2
SYMBOL_SCOPE {
global:
ksensor_create { FLAGS = EXTERN };
- ksensor_create_temp_pcidev { FLAGS = EXTERN };
+ ksensor_create_scalar_pcidev { FLAGS = EXTERN };
ksensor_remove { FLAGS = EXTERN };
+ ksensor_kind_current { FLAGS = EXTERN };
ksensor_kind_temperature { FLAGS = EXTERN };
+ ksensor_kind_voltage { FLAGS = EXTERN };
};
diff --git a/usr/src/uts/common/os/cred.c b/usr/src/uts/common/os/cred.c
index 0bd6cfd44f..5e909667de 100644
--- a/usr/src/uts/common/os/cred.c
+++ b/usr/src/uts/common/os/cred.c
@@ -20,13 +20,14 @@
*/
/*
* Copyright (c) 2013, Ira Cooper. All rights reserved.
+ * Copyright 2020 Nexenta by DDN, Inc. All rights reserved.
*/
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
+/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
@@ -288,7 +289,7 @@ crget(void)
{
cred_t *cr = kmem_cache_alloc(cred_cache, KM_SLEEP);
- bcopy(kcred, cr, crsize);
+ bcopy(zone_kcred(), cr, crsize);
cr->cr_ref = 1;
zone_cred_hold(cr->cr_zone);
if (cr->cr_label)
@@ -377,7 +378,7 @@ crfree(cred_t *cr)
/*
* Copy a cred structure to a new one and free the old one.
* The new cred will have two references. One for the calling process,
- * and one for the thread.
+ * and one for the thread.
*/
cred_t *
crcopy(cred_t *cr)
@@ -404,7 +405,7 @@ crcopy(cred_t *cr)
/*
* Copy a cred structure to a new one and free the old one.
* The new cred will have two references. One for the calling process,
- * and one for the thread.
+ * and one for the thread.
* This variation on crcopy uses a pre-allocated structure for the
* "new" cred.
*/
diff --git a/usr/src/uts/common/os/ksensor.c b/usr/src/uts/common/os/ksensor.c
index c89cad4206..491fbcc7cd 100644
--- a/usr/src/uts/common/os/ksensor.c
+++ b/usr/src/uts/common/os/ksensor.c
@@ -544,14 +544,29 @@ ksensor_create(dev_info_t *dip, const ksensor_ops_t *ops, void *arg,
}
int
-ksensor_create_temp_pcidev(dev_info_t *dip, const ksensor_ops_t *ops,
- void *arg, const char *name, id_t *idp)
+ksensor_create_scalar_pcidev(dev_info_t *dip, uint_t kind,
+ const ksensor_ops_t *ops, void *arg, const char *name, id_t *idp)
{
char *pci_name, *type;
+ const char *class;
int *regs, ret;
uint_t nregs;
uint16_t bus, dev;
+ switch (kind) {
+ case SENSOR_KIND_TEMPERATURE:
+ class = "ddi_sensor:temperature:pci";
+ break;
+ case SENSOR_KIND_VOLTAGE:
+ class = "ddi_sensor:voltage:pci";
+ break;
+ case SENSOR_KIND_CURRENT:
+ class = "ddi_sensor:current:pci";
+ break;
+ default:
+ return (ENOTSUP);
+ }
+
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, 0, "device_type",
&type) != DDI_PROP_SUCCESS) {
return (EINVAL);
@@ -579,8 +594,7 @@ ksensor_create_temp_pcidev(dev_info_t *dip, const ksensor_ops_t *ops,
pci_name = kmem_asprintf("%x.%x:%s", bus, dev, name);
- ret = ksensor_create(dip, ops, arg, pci_name,
- "ddi_sensor:temperature:pci", idp);
+ ret = ksensor_create(dip, ops, arg, pci_name, class, idp);
strfree(pci_name);
return (ret);
}
@@ -750,7 +764,7 @@ ksensor_op_kind(id_t id, sensor_ioctl_kind_t *kind)
}
int
-ksensor_op_temperature(id_t id, sensor_ioctl_temperature_t *temp)
+ksensor_op_scalar(id_t id, sensor_ioctl_scalar_t *scalar)
{
int ret;
ksensor_t *sensor;
@@ -759,7 +773,7 @@ ksensor_op_temperature(id_t id, sensor_ioctl_temperature_t *temp)
return (ret);
}
- ret = sensor->ksensor_ops->kso_temp(sensor->ksensor_arg, temp);
+ ret = sensor->ksensor_ops->kso_scalar(sensor->ksensor_arg, scalar);
ksensor_release(sensor);
return (ret);
@@ -831,6 +845,20 @@ ksensor_kind_temperature(void *unused, sensor_ioctl_kind_t *k)
return (0);
}
+int
+ksensor_kind_current(void *unused, sensor_ioctl_kind_t *k)
+{
+ k->sik_kind = SENSOR_KIND_CURRENT;
+ return (0);
+}
+
+int
+ksensor_kind_voltage(void *unused, sensor_ioctl_kind_t *k)
+{
+ k->sik_kind = SENSOR_KIND_VOLTAGE;
+ return (0);
+}
+
void
ksensor_init(void)
{
diff --git a/usr/src/uts/common/os/softint.c b/usr/src/uts/common/os/softint.c
index ecdb038c79..8801340cf9 100644
--- a/usr/src/uts/common/os/softint.c
+++ b/usr/src/uts/common/os/softint.c
@@ -58,29 +58,29 @@
*
* Starting state is IDLE.
*
- * softint()
+ * softint()
*
*
* (c)
- * ____________________________________________________
- * | ^ ^
- * v (a) | (b) |
- * IDLE--------------------->PEND--------------------->DRAIN
- * ^ | |
- * | | |
- * | | |
- * | | |
- * | | |
- * | d d
- * | | |
- * | v v
- * | PEND DRAIN
- * | (e) & &
- * |<-----------------------STEAL STEAL
- * ^ |
- * | |
- * | (e) v
- * |_________________________<__________________________|
+ * ____________________________________________________
+ * | ^ ^
+ * v (a) | (b) |
+ * IDLE--------------------->PEND--------------------->DRAIN
+ * ^ | |
+ * | | |
+ * | | |
+ * | | |
+ * | | |
+ * | d d
+ * | | |
+ * | v v
+ * | PEND DRAIN
+ * | (e) & &
+ * |<-----------------------STEAL STEAL
+ * ^ |
+ * | |
+ * | (e) v
+ * |_________________________<__________________________|
*
*
*
@@ -146,9 +146,9 @@ uint_t softcall_pokemax = 10;
/*
* This ensures that softcall entries don't get stuck for long. It's expressed
- * in 10 milliseconds as 1 unit. When hires_tick is set or other clock frequency
- * is used, softcall_init() ensures that it's still expressed as 1 = 10 milli
- * seconds.
+ * in 10 milliseconds as 1 unit. Regardless of the value of hires_tick or
+ * clock frequency, softcall_init() ensures that it's still expressed as 1 =
+ * 10 milliseconds.
*/
unsigned int softcall_delay = 1;
diff --git a/usr/src/uts/common/sys/font.h b/usr/src/uts/common/sys/font.h
index 5733686bf3..f8f154f428 100644
--- a/usr/src/uts/common/sys/font.h
+++ b/usr/src/uts/common/sys/font.h
@@ -84,9 +84,11 @@ typedef struct bitmap_data {
} bitmap_data_t;
typedef enum {
- FONT_AUTO,
- FONT_MANUAL,
- FONT_BOOT
+ FONT_AUTO, /* This font is loaded by software */
+ FONT_MANUAL, /* This font is loaded manually by user */
+ FONT_BOOT, /* This font was passed to kernel by bootloader */
+ FONT_BUILTIN, /* This font was built in at compile time */
+ FONT_RELOAD /* This font is marked to be re-read from file */
} FONT_FLAGS;
struct fontlist {
diff --git a/usr/src/uts/common/sys/ksensor_impl.h b/usr/src/uts/common/sys/ksensor_impl.h
index 8d91973bc3..7407a264a2 100644
--- a/usr/src/uts/common/sys/ksensor_impl.h
+++ b/usr/src/uts/common/sys/ksensor_impl.h
@@ -35,7 +35,7 @@ extern void ksensor_init(void);
* Operations vectors.
*/
extern int ksensor_op_kind(id_t, sensor_ioctl_kind_t *);
-extern int ksensor_op_temperature(id_t, sensor_ioctl_temperature_t *);
+extern int ksensor_op_scalar(id_t, sensor_ioctl_scalar_t *);
/*
* Registration callbacks.
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index a5974f6d7d..bd668cdb6b 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -171,6 +171,7 @@ typedef enum {
* Please append properties to the end of this list. Do not reorder the list.
*/
typedef enum {
+ MAC_PROP_PRIVATE = -1,
MAC_PROP_DUPLEX = 0x00000001,
MAC_PROP_SPEED,
MAC_PROP_STATUS,
@@ -248,8 +249,7 @@ typedef enum {
MAC_PROP_ADV_50GFDX_CAP,
MAC_PROP_EN_50GFDX_CAP,
MAC_PROP_EN_FEC_CAP,
- MAC_PROP_ADV_FEC_CAP,
- MAC_PROP_PRIVATE = -1
+ MAC_PROP_ADV_FEC_CAP
} mac_prop_id_t;
/*
diff --git a/usr/src/uts/common/sys/sensors.h b/usr/src/uts/common/sys/sensors.h
index a39dfca239..a5d830a933 100644
--- a/usr/src/uts/common/sys/sensors.h
+++ b/usr/src/uts/common/sys/sensors.h
@@ -33,6 +33,8 @@ extern "C" {
*/
#define SENSOR_KIND_UNKNOWN 0x00
#define SENSOR_KIND_TEMPERATURE 0x01
+#define SENSOR_KIND_VOLTAGE 0x02
+#define SENSOR_KIND_CURRENT 0x03
/*
* Lists of units that senors may have.
@@ -41,52 +43,60 @@ extern "C" {
#define SENSOR_UNIT_CELSIUS 0x01
#define SENSOR_UNIT_FAHRENHEIT 0x02
#define SENSOR_UNIT_KELVIN 0x03
+#define SENSOR_UNIT_VOLTS 0x04
+#define SENSOR_UNIT_AMPS 0x05
#define SENSOR_IOCTL (('s' << 24) | ('e' << 16) | ('n' << 8))
/*
* Ask the sensor what kind of sensor it is.
*/
-#define SENSOR_IOCTL_TYPE (SENSOR_IOCTL | 0x01)
+#define SENSOR_IOCTL_KIND (SENSOR_IOCTL | 0x01)
typedef struct sensor_ioctl_kind {
uint64_t sik_kind;
} sensor_ioctl_kind_t;
/*
- * Ask the sensor for a temperature measurement. The sensor is responsible for
- * returning the units it's in. A temperature measurement is broken down into a
+ * Ask the sensor for a scalar measurement. The sensor is responsible for
+ * returning the units it's in. A scalar measurement is broken down into a
* signed value and a notion of its granularity. The sit_gran member indicates
- * the granularity: the number of increments per degree in the temperature
- * measurement (the sit_temp member). sit_gran is signed and the sign indicates
- * whether one needs to multiply or divide the granularity. For example, a
- * value that set sit_gran to 10 would mean that the value in sit_temp was in
- * 10ths of a degree and that to get the actual value in degrees, one would
- * divide by 10. On the other hand, a negative value means that we effectively
- * have to multiply to get there. For example, a value of -2 would indicate that
- * each value in sit_temp indicated two degrees and to get the temperature in
- * degrees you would multiply sit_temp by two.
+ * the granularity: the number of increments per unit in the measurement (the
+ * sit_value member). sit_gran is signed and the sign indicates whether one
+ * needs to multiply or divide the granularity. The sit_prec member describes a
+ * +/- value (taking sit_gran into account) that describes the precision of the
+ * sensor.
+ *
+ * For example, consider a temperature sensor that set sit_gran to 10. This
+ * would mean that the value in sit_value was in 10ths of a degree and that to
+ * get the actual value in degrees, one would divide by 10. On the other hand, a
+ * negative value means that we effectively have to multiply to get there. For
+ * example, a value of -2 would indicate that each value in sit_value indicated
+ * two degrees and to get the temperature in degrees you would multiply
+ * sit_value * by two.
*/
-#define SENSOR_IOCTL_TEMPERATURE (SENSOR_IOCTL | 0x02)
+#define SENSOR_IOCTL_SCALAR (SENSOR_IOCTL | 0x02)
-typedef struct sensor_ioctl_temperature {
- uint32_t sit_unit;
- int32_t sit_gran;
- uint32_t sit_prec;
- uint32_t sit_pad;
- int64_t sit_temp;
-} sensor_ioctl_temperature_t;
+typedef struct sensor_ioctl_scalar {
+ uint32_t sis_unit;
+ int32_t sis_gran;
+ uint32_t sis_prec;
+ uint32_t sis_pad;
+ int64_t sis_value;
+} sensor_ioctl_scalar_t;
#ifdef _KERNEL
typedef int (*ksensor_kind_f)(void *, sensor_ioctl_kind_t *);
-typedef int (*ksensor_temp_f)(void *, sensor_ioctl_temperature_t *);
+typedef int (*ksensor_scalar_f)(void *, sensor_ioctl_scalar_t *);
typedef struct {
- ksensor_kind_f kso_kind;
- ksensor_temp_f kso_temp;
+ ksensor_kind_f kso_kind;
+ ksensor_scalar_f kso_scalar;
} ksensor_ops_t;
extern int ksensor_kind_temperature(void *, sensor_ioctl_kind_t *);
+extern int ksensor_kind_voltage(void *, sensor_ioctl_kind_t *);
+extern int ksensor_kind_current(void *, sensor_ioctl_kind_t *);
/*
* Create a sensor where the class and name is supplied.
@@ -95,11 +105,11 @@ extern int ksensor_create(dev_info_t *, const ksensor_ops_t *, void *,
const char *, const char *, id_t *);
/*
- * Create a temperature sensor for a PCI device. If this is not a device-wide
+ * Create a scalar sensor for a PCI device. If this is not a device-wide
* (e.g. per-function) sensor, this should not be used.
*/
-extern int ksensor_create_temp_pcidev(dev_info_t *, const ksensor_ops_t *,
- void *, const char *, id_t *);
+extern int ksensor_create_scalar_pcidev(dev_info_t *, uint_t,
+ const ksensor_ops_t *, void *, const char *, id_t *);
/*
* Remove a named or all sensors from this driver.
diff --git a/usr/src/uts/common/sys/smbios.h b/usr/src/uts/common/sys/smbios.h
index 55048d549d..b8b470b79a 100644
--- a/usr/src/uts/common/sys/smbios.h
+++ b/usr/src/uts/common/sys/smbios.h
@@ -22,6 +22,7 @@
/*
* Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -527,6 +528,8 @@ typedef struct smbios_processor {
#define SMB_PRU_BGA1392 0x3A /* Socket BGA1392 */
#define SMB_PRU_BGA1510 0x3B /* Socket BGA1510 */
#define SMB_PRU_BGA1528 0x3C /* Socket BGA1528 */
+#define SMB_PRU_LGA4189 0x3D /* Socket LGA4189 */
+#define SMB_PRU_LGA1200 0x3E /* Socket LGA1200 */
#define SMB_PRC_RESERVED 0x0001 /* reserved */
#define SMB_PRC_UNKNOWN 0x0002 /* unknown */
@@ -944,6 +947,9 @@ typedef struct smbios_slot {
uint8_t smbl_df; /* device/function number */
uint8_t smbl_dbw; /* data bus width */
uint8_t smbl_npeers; /* PCIe bifurcation peers */
+ uint8_t smbl_info; /* slot info */
+ uint8_t smbl_pwidth; /* slot physical width */
+ uint32_t smbl_pitch; /* slot pitch in 10um */
} smbios_slot_t;
#define SMB_SLT_OTHER 0x01 /* other */
@@ -976,8 +982,8 @@ typedef struct smbios_slot {
#define SMB_SLT_MXM_V 0x1C /* MXM Type IV */
#define SMB_SLT_MXM3_A 0x1D /* MXM 3.0 Type A */
#define SMB_SLT_MXM3_B 0x1E /* MXM 3.0 Type B */
-#define SMB_SLT_PCIEG2_SFF 0x1F /* PCI Express Gen 2 SFF-8639 */
-#define SMB_SLT_PCIEG3_SFF 0x20 /* PCI Express Gen 3 SFF-8639 */
+#define SMB_SLT_PCIEG2_SFF 0x1F /* PCI Express Gen 2 SFF-8639 (U.2) */
+#define SMB_SLT_PCIEG3_SFF 0x20 /* PCI Express Gen 3 SFF-8639 (U.2) */
/*
* These lines must be on one line for the string generating code.
*/
@@ -986,6 +992,11 @@ typedef struct smbios_slot {
#define SMB_SLT_PCIE_M52_WOBSKO 0x22 /* PCI Express Mini 52-pin without bottom-side keep-outs */
/* END CSTYLED */
#define SMB_SLT_PCIE_M76 0x23 /* PCI Express Mini 72-pin */
+#define SMB_SLT_PCIEG4_SFF 0x24 /* PCI Express Gen 4 SFF-8639 (U.2) */
+#define SMB_SLT_PCIEG5_SFF 0x25 /* PCI Express Gen 5 SFF-8639 (U.2) */
+#define SMB_SLT_OCP3_SFF 0x26 /* OCP NIC 3.0 Small Form Factor */
+#define SMB_SLT_OCP3_LFF 0x27 /* OCP NIC 3.0 Large Form Factor */
+#define SMB_SLT_OCP_PRE 0x28 /* OCP NIC prior to 3.0 */
#define SMB_SLT_CXL1 0x30 /* CXL Flexbus 1.0 */
#define SMB_SLT_PC98_C20 0xA0 /* PC-98/C20 */
#define SMB_SLT_PC98_C24 0xA1 /* PC-98/C24 */
@@ -1016,6 +1027,15 @@ typedef struct smbios_slot {
#define SMB_SLT_PCIE4G4 0xBB /* PCI Exp. Gen 4 x4 */
#define SMB_SLT_PCIE4G8 0xBC /* PCI Exp. Gen 4 x8 */
#define SMB_SLT_PCIE4G16 0xBD /* PCI Exp. Gen 4 x16 */
+#define SMB_SLT_PCIE5G 0xBE /* PCI Exp. Gen 5 */
+#define SMB_SLT_PCIE5G1 0xBF /* PCI Exp. Gen 5 x1 */
+#define SMB_SLT_PCIE5G2 0xC0 /* PCI Exp. Gen 5 x2 */
+#define SMB_SLT_PCIE5G4 0xC1 /* PCI Exp. Gen 5 x4 */
+#define SMB_SLT_PCIE5G8 0xC2 /* PCI Exp. Gen 5 x8 */
+#define SMB_SLT_PCIE5G16 0xC3 /* PCI Exp. Gen 5 x16 */
+#define SMB_SLT_PCIEG6P 0xC4 /* PCI Exp. Gen 6+ */
+#define SMB_SLT_EDSFF_E1 0xC5 /* Ent. and DC 1U E1 Form Factor */
+#define SMB_SLT_EDSFF_E3 0xC6 /* Ent. and DC 3" E3 Form Factor */
#define SMB_SLW_OTHER 0x01 /* other */
#define SMB_SLW_UNKNOWN 0x02 /* unknown */
@@ -1041,6 +1061,8 @@ typedef struct smbios_slot {
#define SMB_SLL_UNKNOWN 0x02 /* unknown */
#define SMB_SLL_SHORT 0x03 /* short length */
#define SMB_SLL_LONG 0x04 /* long length */
+#define SMB_SLL_2IN5 0x05 /* 2.5" drive form factor */
+#define SMB_SLL_3IN5 0x06 /* 3.5" drive form factor */
#define SMB_SLCH1_UNKNOWN 0x01 /* characteristics unknown */
#define SMB_SLCH1_5V 0x02 /* provides 5.0V */
@@ -1055,6 +1077,9 @@ typedef struct smbios_slot {
#define SMB_SLCH2_HOTPLUG 0x02 /* slot supports hot-plug devices */
#define SMB_SLCH2_SMBUS 0x04 /* slot supports SMBus signal */
#define SMB_SLCH2_BIFUR 0x08 /* slot supports PCIe bifurcation */
+#define SMB_SLCH2_SURPREM 0x10 /* slot supports surprise removal */
+#define SMB_SLCH2_CXL1 0x20 /* Flexbus slot, CXL 1.0 capable */
+#define SMB_SLCH2_CXL2 0x40 /* Flexbus slot, CXL 2.0 capable */
/*
* SMBIOS 7.10.9 Slot Peer Devices
@@ -1178,7 +1203,7 @@ typedef struct smbios_memarray {
#define SMB_MAL_PC98C24 0xA1 /* PC-98/C24 add-on card */
#define SMB_MAL_PC98E 0xA2 /* PC-98/E add-on card */
#define SMB_MAL_PC98LB 0xA3 /* PC-98/Local bus add-on card */
-#define SMB_MAL_CXL1 0xA4 /* CXL Flexbus 1.0 add-on card */
+#define SMB_MAL_CXL1 0xA4 /* CXL add-on card */
#define SMB_MAU_OTHER 0x01 /* other */
#define SMB_MAU_UNKNOWN 0x02 /* unknown */
@@ -1285,6 +1310,8 @@ typedef struct smbios_memdevice {
#define SMB_MDT_LOGNV 0x1F /* Logical non-volatile device */
#define SMB_MDT_HBM 0x20 /* High Bandwidth Memory */
#define SMB_MDT_HBM2 0x21 /* High Bandwidth Memory 2 */
+#define SMB_MDT_DDR5 0x22 /* DDR5 */
+#define SMB_MDT_LPDDR5 0x23 /* LPDDR5 */
#define SMB_MDF_OTHER 0x0002 /* other */
#define SMB_MDF_UNKNOWN 0x0004 /* unknown */
@@ -1313,7 +1340,7 @@ typedef struct smbios_memdevice {
#define SMB_MTECH_NVDIMM_N 0x04 /* NVDIMM-N */
#define SMB_MTECH_NVDIMM_F 0x05 /* NVDIMM-F */
#define SMB_MTECH_NVDIMM_P 0x06 /* NVDIMM-P */
-#define SMB_MTECH_INTCPM 0x07 /* Intel Optane DC Persistent Memory */
+#define SMB_MTECH_INTCPM 0x07 /* Intel Optane persistent memory */
#define SMB_MOMC_RESERVED 0x01 /* reserved */
#define SMB_MOMC_OTHER 0x02 /* other */
@@ -1838,7 +1865,8 @@ typedef struct smbios_memdevice_ext {
#define SMB_VERSION_31 0x0301 /* SMBIOS encoding for DMTF spec 3.1 */
#define SMB_VERSION_32 0x0302 /* SMBIOS encoding for DMTF spec 3.2 */
#define SMB_VERSION_33 0x0303 /* SMBIOS encoding for DMTF spec 3.3 */
-#define SMB_VERSION SMB_VERSION_33 /* SMBIOS latest version definitions */
+#define SMB_VERSION_34 0x0304 /* SMBIOS encoding for DMTF spec 3.4 */
+#define SMB_VERSION SMB_VERSION_34 /* SMBIOS latest version definitions */
#define SMB_O_NOCKSUM 0x1 /* do not verify header checksums */
#define SMB_O_NOVERS 0x2 /* do not verify header versions */
diff --git a/usr/src/uts/common/sys/smbios_impl.h b/usr/src/uts/common/sys/smbios_impl.h
index 69ca79e94f..4b951b702f 100644
--- a/usr/src/uts/common/sys/smbios_impl.h
+++ b/usr/src/uts/common/sys/smbios_impl.h
@@ -22,6 +22,7 @@
/*
* Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -250,9 +251,26 @@ typedef struct smb_slot {
uint8_t smbsl_dbw; /* Data bus width */
uint8_t smbsl_npeers; /* Peer bdf groups */
smb_slot_peer_t smbsl_peers[]; /* bifurcation peers */
+ /* There are later additions in 3.4+, see smbios_slot_cont_t */
} smb_slot_t;
/*
+ * After the variable number of smbsl_peers, the smbios_slot has continued in
+ * size and has the following members defined as of version 3.4. These occur
+ * starting at byte 14 + 5 * smbsl_npeers.
+ */
+typedef struct smb_slot_cont {
+ uint8_t smbsl_info; /* slot info */
+ uint8_t smbsl_pwidth; /* slot physical width */
+ uint16_t smbsl_pitch; /* slot pitch */
+} smb_slot_cont_t;
+
+/*
+ * The first byte that the smb_slot_cont_t is defined to start at.
+ */
+#define SMB_SLOT_CONT_START 0x14
+
+/*
* SMBIOS implementation structure for SMB_TYPE_OBDEVS.
*/
typedef struct smb_obdev {
diff --git a/usr/src/uts/common/sys/socket_proto.h b/usr/src/uts/common/sys/socket_proto.h
index 4e1a4a0f35..825d0501c7 100644
--- a/usr/src/uts/common/sys/socket_proto.h
+++ b/usr/src/uts/common/sys/socket_proto.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 Joyent, Inc.
*/
#ifndef _SYS_SOCKET_PROTO_H_
@@ -202,7 +203,16 @@ struct sock_upcalls_s {
void (*su_signal_oob)(sock_upper_handle_t, ssize_t);
void (*su_zcopy_notify)(sock_upper_handle_t);
void (*su_set_error)(sock_upper_handle_t, int);
+ /*
+ * NOTE: This function frees upper handle items. Caller cannot
+ * rely on them after this upcall.
+ */
void (*su_closed)(sock_upper_handle_t);
+ /*
+ * NOTE: This function MUST be implemented without using lower-level
+ * downcalls or accesses. This allows callers to ensure su_closed()
+ * upcalls can happen indepdently or concurrently.
+ */
vnode_t *(*su_get_vnode)(sock_upper_handle_t);
};
diff --git a/usr/src/uts/common/sys/time.h b/usr/src/uts/common/sys/time.h
index a69bf4dd63..f6cfa1a7e5 100644
--- a/usr/src/uts/common/sys/time.h
+++ b/usr/src/uts/common/sys/time.h
@@ -16,6 +16,8 @@
*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2016 Joyent, Inc.
+ *
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -365,14 +367,14 @@ extern todinfo_t utc_to_tod(time_t);
extern time_t tod_to_utc(todinfo_t);
extern int hr_clock_lock(void);
extern void hr_clock_unlock(int);
-extern hrtime_t gethrtime(void);
-extern hrtime_t gethrtime_unscaled(void);
+extern hrtime_t gethrtime(void);
+extern hrtime_t gethrtime_unscaled(void);
extern hrtime_t gethrtime_max(void);
extern hrtime_t gethrtime_waitfree(void);
extern void scalehrtime(hrtime_t *);
extern uint64_t unscalehrtime(hrtime_t);
-extern void gethrestime(timespec_t *);
-extern time_t gethrestime_sec(void);
+extern void gethrestime(timespec_t *);
+extern time_t gethrestime_sec(void);
extern void gethrestime_lasttick(timespec_t *);
extern void hrt2ts(hrtime_t, timestruc_t *);
extern hrtime_t ts2hrt(const timestruc_t *);
@@ -408,6 +410,7 @@ int futimesat(int, const char *, const struct timeval *);
int getitimer(int, struct itimerval *);
int utimes(const char *, const struct timeval *);
+
#if defined(_XPG4_2)
int setitimer(int, const struct itimerval *_RESTRICT_KYWD,
struct itimerval *_RESTRICT_KYWD);
@@ -418,6 +421,22 @@ int setitimer(int, struct itimerval *_RESTRICT_KYWD,
#endif /* !defined(_KERNEL) ... defined(_XPG4_2) */
+#if !defined(_KERNEL) && !defined(_STRICT_SYMBOLS)
+int futimes(int, const struct timeval *);
+int lutimes(const char *, const struct timeval *);
+
+#define TIMESPEC_TO_TIMEVAL(tv, ts) { \
+ (tv)->tv_sec = (ts)->tv_sec; \
+ (tv)->tv_usec = (ts)->tv_nsec / 1000; \
+}
+
+#define TIMEVAL_TO_TIMESPEC(tv, ts) { \
+ (ts)->tv_sec = (tv)->tv_sec; \
+ (ts)->tv_nsec = (tv)->tv_usec * 1000; \
+}
+
+#endif /* !defined(_KERNEL) && !defined(_STRICT_SYMBOLS) */
+
/*
* gettimeofday() and settimeofday() were included in SVr4 due to their
* common use in BSD based applications. They were to be included exactly
diff --git a/usr/src/uts/i86pc/boot/boot_fb.c b/usr/src/uts/i86pc/boot/boot_fb.c
index 1ac4789af7..e0e79bd14e 100644
--- a/usr/src/uts/i86pc/boot/boot_fb.c
+++ b/usr/src/uts/i86pc/boot/boot_fb.c
@@ -354,28 +354,44 @@ boot_get_color(uint32_t *fg, uint32_t *bg)
/* ansi to solaris colors, see also boot_console.c */
if (fb_info.inverse == B_TRUE ||
fb_info.inverse_screen == B_TRUE) {
- if (fb_info.fg_color < 16)
- *bg = dim_xlate[fb_info.fg_color];
- else
+ if (fb_info.fg_color < XLATE_NCOLORS) {
+ /*
+ * white fg -> bright white bg
+ */
+ if (fb_info.fg_color == pc_white)
+ *bg = brt_xlate[fb_info.fg_color];
+ else
+ *bg = dim_xlate[fb_info.fg_color];
+ } else {
*bg = fb_info.fg_color;
+ }
- if (fb_info.bg_color < 16)
- *fg = brt_xlate[fb_info.bg_color];
- else
+ if (fb_info.bg_color < XLATE_NCOLORS) {
+ if (fb_info.bg_color == pc_white)
+ *fg = brt_xlate[fb_info.bg_color];
+ else
+ *fg = dim_xlate[fb_info.bg_color];
+ } else {
*fg = fb_info.bg_color;
+ }
} else {
- if (fb_info.bg_color < 16) {
- if (fb_info.bg_color == 7)
+ if (fb_info.fg_color < XLATE_NCOLORS) {
+ if (fb_info.fg_color == pc_white)
+ *fg = brt_xlate[fb_info.fg_color];
+ else
+ *fg = dim_xlate[fb_info.fg_color];
+ } else {
+ *fg = fb_info.fg_color;
+ }
+
+ if (fb_info.bg_color < XLATE_NCOLORS) {
+ if (fb_info.bg_color == pc_white)
*bg = brt_xlate[fb_info.bg_color];
else
*bg = dim_xlate[fb_info.bg_color];
} else {
*bg = fb_info.bg_color;
}
- if (fb_info.fg_color < 16)
- *fg = dim_xlate[fb_info.fg_color];
- else
- *fg = fb_info.fg_color;
}
}
diff --git a/usr/src/uts/i86pc/io/apix/apix.c b/usr/src/uts/i86pc/io/apix/apix.c
index 18dee7499a..cedc49147e 100644
--- a/usr/src/uts/i86pc/io/apix/apix.c
+++ b/usr/src/uts/i86pc/io/apix/apix.c
@@ -186,18 +186,6 @@ static void *apix_hdlp;
static int apix_is_enabled = 0;
/*
- * Flag to indicate if APIX is to be enabled only for platforms
- * with specific hw feature(s).
- */
-int apix_hw_chk_enable = 1;
-
-/*
- * Hw features that are checked for enabling APIX support.
- */
-#define APIX_SUPPORT_X2APIC 0x00000001
-uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
-
-/*
* apix_lock is used for cpu selection and vector re-binding
*/
lock_t apix_lock;
@@ -272,22 +260,10 @@ apix_probe()
if (get_hwenv() & HW_XEN_HVM)
return (PSM_FAILURE);
- /* check for hw features if specified */
- if (apix_hw_chk_enable) {
- /* check if x2APIC mode is supported */
- if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
- APIX_SUPPORT_X2APIC) {
- if (apic_local_mode() == LOCAL_X2APIC) {
- /* x2APIC mode activated by BIOS, switch ops */
- apic_mode = LOCAL_X2APIC;
- apic_change_ops();
- } else if (!apic_detect_x2apic()) {
- /* x2APIC mode is not supported in the hw */
- apix_enable = 0;
- }
- }
- if (apix_enable == 0)
- return (PSM_FAILURE);
+ if (apic_local_mode() == LOCAL_X2APIC) {
+ /* x2APIC mode activated by BIOS, switch ops */
+ apic_mode = LOCAL_X2APIC;
+ apic_change_ops();
}
rval = apic_probe_common(apix_psm_info.p_mach_idstring);
diff --git a/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c b/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c
index 6d1a99ea05..0d2d1fe1de 100644
--- a/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c
+++ b/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c
@@ -11,6 +11,7 @@
/*
* Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -81,6 +82,17 @@ gfxp_check_for_console(dev_info_t *devi, struct gfxp_fb_softc *softc,
uint16_t data16;
/*
+ * fb_info is filled in by data gathered by the bootloader.
+ * In particular we are interested in "paddr" which is the physical
+ * address of the framebuffer. If that is not zero, then we have
+ * a valid framebuffer and we can use this device as a console.
+ */
+ if (fb_info.paddr != 0) {
+ softc->flags |= GFXP_FLAG_CONSOLE;
+ return;
+ }
+
+ /*
* Based on Section 11.3, "PCI Display Subsystem Initialization",
* of the 1.1 PCI-to-PCI Bridge Architecture Specification
* determine if this is the boot console device. First, see
diff --git a/usr/src/uts/i86pc/io/hpet_acpi.c b/usr/src/uts/i86pc/io/hpet_acpi.c
index ac5a885a38..aace99b18b 100644
--- a/usr/src/uts/i86pc/io/hpet_acpi.c
+++ b/usr/src/uts/i86pc/io/hpet_acpi.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2020 Oxide Computer Company
*/
#include <sys/hpet_acpi.h>
@@ -34,6 +35,8 @@
#include <sys/clock.h>
#include <sys/archsystm.h>
#include <sys/cpupart.h>
+#include <sys/x86_archext.h>
+#include <sys/prom_debug.h>
static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags);
static boolean_t hpet_install_proxy(void);
@@ -140,17 +143,36 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
(void) memset(&hpet_info, 0, sizeof (hpet_info));
hpet.supported = HPET_NO_SUPPORT;
- if (idle_cpu_no_deep_c)
+ if ((get_hwenv() & HW_XEN_HVM) != 0) {
+ /*
+ * In some AWS EC2 guests, though the HPET is advertised via
+ * ACPI, programming the interrupt on the non-legacy timer can
+ * result in an immediate reset of the instance. It is not
+ * currently possible to tell whether this is an instance with
+ * broken HPET emulation or not, so we simply disable it across
+ * the board.
+ */
+ PRM_POINT("will not program HPET in Xen HVM");
return (DDI_FAILURE);
+ }
- if (!cpuid_deep_cstates_supported())
+ if (idle_cpu_no_deep_c ||
+ !cpuid_deep_cstates_supported()) {
+ /*
+ * If Deep C-States are disabled or not supported, then we do
+ * not need to program the HPET at all as it will not
+ * subsequently be used.
+ */
+ PRM_POINT("no need to program the HPET");
return (DDI_FAILURE);
+ }
hpet_establish_hooks();
/*
* Get HPET ACPI table 1.
*/
+ PRM_POINT("AcpiGetTable() HPET #1");
if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1,
(ACPI_TABLE_HEADER **)&hpet_table))) {
cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
@@ -162,14 +184,18 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
return (DDI_FAILURE);
}
+ PRM_POINT("hpet_memory_map()");
la = hpet_memory_map(hpet_table);
+ PRM_DEBUG(la);
if (la == NULL) {
cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
return (DDI_FAILURE);
}
hpet_info.logical_address = la;
+ PRM_POINT("hpet_read_gen_cap()");
ret = hpet_read_gen_cap(&hpet_info);
+ PRM_DEBUG(ret);
hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
@@ -189,6 +215,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
}
num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
+ PRM_DEBUG(num_timers);
if ((num_timers < 3) || (num_timers > 32)) {
cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
"%lx", (long)num_timers);
@@ -197,20 +224,23 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
num_timers * sizeof (uint64_t), KM_SLEEP);
+ PRM_POINT("hpet_read_gen_config()");
ret = hpet_read_gen_config(&hpet_info);
hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
/*
- * Solaris does not use the HPET Legacy Replacement Route capabilities.
+ * illumos does not use the HPET Legacy Replacement Route capabilities.
* This feature has been off by default on test systems.
* The HPET spec does not specify if Legacy Replacement Route is
- * on or off by default, so we explicitely set it off here.
+ * on or off by default, so we explicitly set it off here.
* It should not matter which mode the HPET is in since we use
* the first available non-legacy replacement timer: timer 2.
*/
+ PRM_POINT("hpet_read_gen_config()");
(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
+ PRM_POINT("hpet_read_gen_config() again");
ret = hpet_read_gen_config(&hpet_info);
hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
@@ -218,6 +248,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
+ PRM_POINT("disable timer loop...");
for (ti = 0; ti < num_timers; ++ti) {
ret = hpet_read_timer_N_config(&hpet_info, ti);
/*
@@ -231,6 +262,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
}
+ PRM_POINT("disable timer loop complete");
/*
* Be aware the Main Counter may need to be initialized in the future
@@ -238,6 +270,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
* The HPET's Main Counter does not need to be initialize to a specific
* value before starting it for use to wake up CPUs from Deep C-States.
*/
+ PRM_POINT("hpet_start_main_counter()");
if (hpet_start_main_counter(&hpet_info) != AE_OK) {
cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
return (DDI_FAILURE);
@@ -247,6 +280,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
/*
* Read main counter twice to record HPET latency for debugging.
*/
+ PRM_POINT("TSC and HPET reads:");
hpet_info.tsc[0] = tsc_read();
hpet_info.hpet_main_counter_reads[0] =
hpet_read_main_counter_value(&hpet_info);
@@ -255,6 +289,12 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
hpet_read_main_counter_value(&hpet_info);
hpet_info.tsc[2] = tsc_read();
+ PRM_DEBUG(hpet_info.hpet_main_counter_reads[0]);
+ PRM_DEBUG(hpet_info.hpet_main_counter_reads[1]);
+ PRM_DEBUG(hpet_info.tsc[0]);
+ PRM_DEBUG(hpet_info.tsc[1]);
+ PRM_DEBUG(hpet_info.tsc[2]);
+
ret = hpet_read_gen_config(&hpet_info);
hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
@@ -293,6 +333,7 @@ hpet_acpi_fini(void)
static int
hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
{
+ PRM_POINT("hpet_get_IOAPIC_intr_capable_timer()");
if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
return (DDI_FAILURE);
@@ -300,6 +341,7 @@ hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
hpet_init_proxy_data();
+ PRM_POINT("hpet_install_interrupt_handler()");
if (hpet_install_interrupt_handler(&hpet_isr,
hpet_info.cstate_timer.intr) != AE_OK) {
cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
@@ -314,13 +356,16 @@ hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
* Avoid a possibly stuck interrupt by programing the HPET's timer here
* before the I/O APIC is programmed to handle this interrupt.
*/
+ PRM_POINT("hpet_timer_set_up()");
hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
hpet_info.cstate_timer.intr);
+ PRM_POINT("back from hpet_timer_set_up()");
/*
* All HPET functionality is supported.
*/
hpet.supported = HPET_FULL_SUPPORT;
+ PRM_POINT("HPET full support");
return (DDI_SUCCESS);
}
@@ -564,14 +609,25 @@ hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
}
static void
-hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l)
+hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t conf)
{
- if (hip->timer_n_config[n].size_cap == 1)
- *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
- hip->logical_address, n) = l;
- else
- *(uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
- hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l);
+ /*
+ * The configuration register size is not affected by the size
+ * capability; it is always a 64-bit value. The top 32-bit half of
+ * this register is always read-only so we constrain our write to the
+ * bottom half.
+ */
+ uint32_t *confaddr = (uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
+ hip->logical_address, n);
+ uint32_t conf32 = 0xFFFFFFFF & conf;
+
+ PRM_DEBUG(n);
+ PRM_DEBUG(conf);
+ PRM_DEBUG(conf32);
+
+ *confaddr = conf32;
+
+ PRM_POINT("write done");
}
static void
@@ -630,16 +686,19 @@ hpet_install_interrupt_handler(avfunc func, int vector)
static int
hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
{
- int timer;
- int intr;
+ int timer;
+ int intr;
for (timer = HPET_FIRST_NON_LEGACY_TIMER;
timer < hip->gen_cap.num_tim_cap; ++timer) {
-
if (!hpet_timer_available(hip->allocated_timers, timer))
continue;
intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
+
+ PRM_DEBUG(timer);
+ PRM_DEBUG(intr);
+
if (intr >= 0) {
hpet_timer_alloc(&hip->allocated_timers, timer);
hip->cstate_timer.timer = timer;
@@ -678,7 +737,12 @@ hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
{
uint64_t conf;
+ PRM_DEBUG(timer_n);
+ PRM_DEBUG(interrupt);
+
+ PRM_POINT("hpet_read_timer_N_config()");
conf = hpet_read_timer_N_config(hip, timer_n);
+ PRM_DEBUG(conf);
/*
* Caller is required to verify this interrupt route is supported.
@@ -691,7 +755,10 @@ hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT; /* disabled */
conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT; /* Level Triggered */
+ PRM_POINT("hpet_write_timer_N_config()");
+ PRM_DEBUG(conf);
hpet_write_timer_N_config(hip, timer_n, conf);
+ PRM_POINT("back from hpet_write_timer_N_config()");
}
/*
diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c
index aea7f2e856..9b9944fbd0 100644
--- a/usr/src/uts/i86pc/io/mp_platform_common.c
+++ b/usr/src/uts/i86pc/io/mp_platform_common.c
@@ -25,6 +25,7 @@
* Copyright (c) 2017 by Delphix. All rights reserved.
* Copyright (c) 2019, Joyent, Inc.
* Copyright 2020 RackTop Systems, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -72,6 +73,7 @@
#include <sys/note.h>
#include <sys/pci_intr_lib.h>
#include <sys/sunndi.h>
+#include <sys/prom_debug.h>
#if !defined(__xpv)
#include <sys/hpet.h>
#include <sys/clock.h>
@@ -334,7 +336,7 @@ apic_probe_common(char *modname)
uint32_t mpct_addr, ebda_start = 0, base_mem_end;
caddr_t biosdatap;
caddr_t mpct = NULL;
- caddr_t fptr;
+ caddr_t fptr = NULL;
int i, mpct_size = 0, mapsize, retval = PSM_FAILURE;
ushort_t ebda_seg, base_mem_size;
struct apic_mpfps_hdr *fpsp;
@@ -342,6 +344,8 @@ apic_probe_common(char *modname)
int bypass_cpu_and_ioapics_in_mptables;
int acpi_user_options;
+ PRM_POINT("apic_probe_common()");
+
if (apic_forceload < 0)
return (retval);
@@ -359,11 +363,15 @@ apic_probe_common(char *modname)
if (!apic_use_acpi)
apic_use_acpi_madt_only = 0;
+ PRM_POINT("acpi_probe()");
retval = acpi_probe(modname);
+ PRM_DEBUG(retval);
/* in UEFI system, there is no BIOS data */
- if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), 0, "efi-systab"))
+ if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), 0, "efi-systab")) {
+ PRM_POINT("UEFI system!");
goto apic_ret;
+ }
/*
* mapin the bios data area 40:0
@@ -371,17 +379,21 @@ apic_probe_common(char *modname)
* 40:0Eh - two-byte location for the exact starting address of
* the EBDA segment for EISA
*/
+ PRM_POINT("psm_map_phys()");
biosdatap = psm_map_phys(0x400, 0x20, PROT_READ);
+ PRM_DEBUG(biosdatap);
if (!biosdatap)
goto apic_ret;
fpsp = (struct apic_mpfps_hdr *)NULL;
mapsize = MPFPS_RAM_WIN_LEN;
/*LINTED: pointer cast may result in improper alignment */
ebda_seg = *((ushort_t *)(biosdatap+0xe));
+ PRM_DEBUG(ebda_seg);
/* check the 1k of EBDA */
if (ebda_seg) {
ebda_start = ((uint32_t)ebda_seg) << 4;
fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ);
+ PRM_DEBUG(fptr);
if (fptr) {
if (!(fpsp =
apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN)))
@@ -389,6 +401,7 @@ apic_probe_common(char *modname)
}
}
/* If not in EBDA, check the last k of system base memory */
+ PRM_DEBUG(fpsp);
if (!fpsp) {
/*LINTED: pointer cast may result in improper alignment */
base_mem_size = *((ushort_t *)(biosdatap + 0x13));
@@ -402,6 +415,7 @@ apic_probe_common(char *modname)
fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN,
PROT_READ);
+ PRM_DEBUG(fptr);
if (fptr) {
if (!(fpsp = apic_find_fps_sig(fptr,
@@ -410,13 +424,16 @@ apic_probe_common(char *modname)
}
}
}
+ PRM_POINT("psm_unmap_phys()");
psm_unmap_phys(biosdatap, 0x20);
/* If still cannot find it, check the BIOS ROM space */
+ PRM_DEBUG(fpsp);
if (!fpsp) {
mapsize = MPFPS_ROM_WIN_LEN;
fptr = psm_map_phys(MPFPS_ROM_WIN_START,
MPFPS_ROM_WIN_LEN, PROT_READ);
+ PRM_DEBUG(fptr);
if (fptr) {
if (!(fpsp =
apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) {
@@ -426,13 +443,18 @@ apic_probe_common(char *modname)
}
}
+ PRM_DEBUG(fptr);
+ PRM_DEBUG(fpsp);
+ PRM_POINT("apic_checksum()");
if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) {
+ PRM_POINT("psm_unmap_phys()");
psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
goto apic_ret;
}
apic_spec_rev = fpsp->mpfps_spec_rev;
if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) {
+ PRM_POINT("psm_unmap_phys()");
psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
goto apic_ret;
}
@@ -442,7 +464,9 @@ apic_probe_common(char *modname)
/* check default configuration (dual CPUs) */
if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) {
+ PRM_POINT("psm_unmap_phys()");
psm_unmap_phys(fptr, mapsize);
+ PRM_POINT("apic_handle_defconf()");
if ((retval = apic_handle_defconf()) != PSM_SUCCESS)
return (retval);
@@ -451,6 +475,7 @@ apic_probe_common(char *modname)
/* MP Configuration Table */
mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr);
+ PRM_DEBUG(mpct_addr);
psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */
@@ -472,6 +497,7 @@ apic_probe_common(char *modname)
}
mpct_size = (int)hdrp->mpcnf_tbl_length;
+ PRM_POINT("apic_set_pwroff_method_from_mpcnfhdr()");
apic_set_pwroff_method_from_mpcnfhdr(hdrp);
psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr));
@@ -497,6 +523,8 @@ apic_probe_common(char *modname)
hdrp = (struct apic_mp_cnf_hdr *)mpct;
apicadr = (uint32_t *)mapin_apic((uint32_t)hdrp->mpcnf_local_apic,
APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
+ PRM_DEBUG(hdrp);
+ PRM_DEBUG(apicadr);
if (!apicadr)
goto apic_fail1;
@@ -509,15 +537,20 @@ apic_probe_common(char *modname)
}
apic_fail1:
+ PRM_POINT("apic_fail1:");
psm_unmap_phys(mpct, mpct_size);
mpct = NULL;
apic_ret:
+ PRM_POINT("apic_ret:");
if (retval == PSM_SUCCESS) {
extern int apic_ioapic_method_probe();
- if ((retval = apic_ioapic_method_probe()) == PSM_SUCCESS)
+ PRM_POINT("apic_ioapic_method_probe()");
+ if ((retval = apic_ioapic_method_probe()) == PSM_SUCCESS) {
+ PRM_POINT("SUCCESS");
return (PSM_SUCCESS);
+ }
}
for (i = 0; i < apic_io_max; i++)
@@ -533,6 +566,7 @@ apic_ret:
if (mpct)
psm_unmap_phys(mpct, mpct_size);
+ PRM_DEBUG(retval);
return (retval);
}
@@ -632,20 +666,24 @@ acpi_probe(char *modname)
if (!apic_use_acpi)
return (PSM_FAILURE);
+ PRM_POINT("AcpiGetTable(MADT)");
if (AcpiGetTable(ACPI_SIG_MADT, 1,
(ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) {
cmn_err(CE_WARN, "!acpi_probe: No MADT found!");
return (PSM_FAILURE);
}
+ PRM_DEBUG((uint32_t)acpi_mapic_dtp->Address);
+ PRM_POINT("mapin_apic()");
apicadr = mapin_apic((uint32_t)acpi_mapic_dtp->Address,
APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
if (!apicadr)
return (PSM_FAILURE);
if ((local_ids = (uint32_t *)kmem_zalloc(NCPU * sizeof (uint32_t),
- KM_NOSLEEP)) == NULL)
+ KM_NOSLEEP)) == NULL) {
return (PSM_FAILURE);
+ }
if ((proc_ids = (uint32_t *)kmem_zalloc(NCPU * sizeof (uint32_t),
KM_NOSLEEP)) == NULL) {
@@ -653,7 +691,9 @@ acpi_probe(char *modname)
return (PSM_FAILURE);
}
+ PRM_POINT("acpi_get_apic_lid()");
local_ids[0] = acpi_get_apic_lid();
+ PRM_DEBUG(local_ids[0]);
apic_nproc = 1;
apic_io_max = 0;
@@ -662,6 +702,7 @@ acpi_probe(char *modname)
madt_size = acpi_mapic_dtp->Header.Length;
madt_seen = sizeof (*acpi_mapic_dtp);
+ PRM_DEBUG(madt_size);
while (madt_seen < madt_size) {
switch (ap->Type) {
case ACPI_MADT_TYPE_LOCAL_APIC:
@@ -812,6 +853,9 @@ acpi_probe(char *modname)
ap = (ACPI_SUBTABLE_HEADER *)(((char *)ap) + ap->Length);
}
+ PRM_DEBUG(apic_nproc);
+ PRM_DEBUG(apic_io_max);
+
/* We found multiple enabled cpus via MADT */
if ((apic_nproc > 1) && (apic_io_max > 0)) {
acpi_found_smp_config = B_TRUE;
@@ -826,6 +870,7 @@ acpi_probe(char *modname)
if (plat_dr_support_cpu()) {
apic_max_nproc = max_ncpus;
}
+ PRM_DEBUG(apic_max_nproc);
apic_cpus_size = max(apic_nproc, max_ncpus) * sizeof (*apic_cpus);
if ((apic_cpus = kmem_zalloc(apic_cpus_size, KM_NOSLEEP)) == NULL)
goto cleanup;
@@ -834,15 +879,21 @@ acpi_probe(char *modname)
* ACPI doesn't provide the local apic ver, get it directly from the
* local apic
*/
+ PRM_POINT("apic_read(APIC_VERS_REG)");
ver = apic_reg_ops->apic_read(APIC_VERS_REG);
+ PRM_DEBUG(ver);
+ PRM_DEBUG(apic_nproc);
+ PRM_DEBUG(boot_ncpus);
for (i = 0; i < apic_nproc; i++) {
apic_cpus[i].aci_local_id = local_ids[i];
apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF);
apic_cpus[i].aci_processor_id = proc_ids[i];
/* Only build mapping info for CPUs present at boot. */
- if (i < boot_ncpus)
+ if (i < boot_ncpus) {
(void) acpica_map_cpu(i, proc_ids[i]);
+ }
}
+ PRM_POINT("acpica_map_cpu loop complete");
/*
* To support CPU dynamic reconfiguration, the apic CPU info structure
@@ -881,8 +932,10 @@ acpi_probe(char *modname)
apic_cpus[i].aci_status = APIC_CPU_FREE;
}
+ PRM_POINT("ioapic reads");
for (i = 0; i < apic_io_max; i++) {
ioapic_ix = i;
+ PRM_DEBUG(ioapic_ix);
/*
* need to check Sitka on the following acpi problem
@@ -892,16 +945,20 @@ acpi_probe(char *modname)
* actual id directly from the ioapic.
*/
id = ioapic_read(ioapic_ix, APIC_ID_CMD);
+ PRM_DEBUG(id);
hid = (uchar_t)(id >> 24);
+ PRM_DEBUG(hid);
if (hid != apic_io_id[i]) {
if (apic_io_id[i] == 0)
apic_io_id[i] = hid;
else { /* set ioapic id to whatever reported by ACPI */
id = ((uint32_t)apic_io_id[i]) << 24;
+ PRM_POINT("ioapic_write(ID)");
ioapic_write(ioapic_ix, APIC_ID_CMD, id);
}
}
+ PRM_POINT("ioapic_read(VERS)");
ver = ioapic_read(ioapic_ix, APIC_VERS_CMD);
apic_io_ver[i] = (uchar_t)(ver & 0xff);
intmax = (ver >> 16) & 0xff;
@@ -917,6 +974,7 @@ acpi_probe(char *modname)
* acpi-user-options specifies legacy mode
* (no SCI, no ACPI mode)
*/
+ PRM_POINT("acpica_get_sci()");
if (acpica_get_sci(&sci, &sci_flags) != AE_OK)
sci = -1;
@@ -925,6 +983,7 @@ acpi_probe(char *modname)
* If this fails, we don't attempt to use ACPI
* even if we were able to get a MADT above
*/
+ PRM_POINT("acpica_init()");
if (acpica_init() != AE_OK) {
cmn_err(CE_WARN, "!apic: Failed to initialize acpica!");
goto cleanup;
@@ -934,6 +993,7 @@ acpi_probe(char *modname)
* Call acpica_build_processor_map() now that we have
* ACPI namesspace access
*/
+ PRM_POINT("acpica_build_processor_map()");
(void) acpica_build_processor_map();
/*
@@ -952,15 +1012,19 @@ acpi_probe(char *modname)
if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG)
acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG;
+ PRM_POINT("acpi_psm_init()");
if (acpi_psm_init(modname, acpi_verboseflags) == ACPI_PSM_FAILURE)
goto cleanup;
/* Enable ACPI APIC interrupt routing */
+ PRM_POINT("apic_acpi_enter_apicmode()");
if (apic_acpi_enter_apicmode() != PSM_FAILURE) {
cmn_err(CE_NOTE, "!apic: Using APIC interrupt routing mode");
+ PRM_POINT("build_reserved_irqlist()");
build_reserved_irqlist((uchar_t *)apic_reserved_irqlist);
apic_enable_acpi = 1;
if (apic_sci_vect > 0) {
+ PRM_POINT("acpica_set_core_feature()");
acpica_set_core_feature(ACPI_FEATURE_SCI_EVENT);
}
if (apic_use_acpi_madt_only) {
@@ -970,16 +1034,18 @@ acpi_probe(char *modname)
#if !defined(__xpv)
/*
- * probe ACPI for hpet information here which is used later
- * in apic_picinit().
+ * Probe ACPI for HPET information here which is used later in
+ * apic_picinit(). Note that we do not need to use the HPET at
+ * all on most modern systems, but if there is an actionable
+ * failure message it will be logged by the routine itself.
*/
- if (hpet_acpi_init(&apic_hpet_vect, &apic_hpet_flags) < 0) {
- cmn_err(CE_NOTE, "!ACPI HPET table query failed\n");
- }
+ PRM_POINT("hpet_acpi_init()");
+ (void) hpet_acpi_init(&apic_hpet_vect, &apic_hpet_flags);
#endif
kmem_free(local_ids, NCPU * sizeof (uint32_t));
kmem_free(proc_ids, NCPU * sizeof (uint32_t));
+ PRM_POINT("SUCCESS");
return (PSM_SUCCESS);
}
/* if setting APIC mode failed above, we fall through to cleanup */
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.c b/usr/src/uts/i86pc/io/pcplusmp/apic.c
index efca63c814..c987391435 100644
--- a/usr/src/uts/i86pc/io/pcplusmp/apic.c
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c
@@ -26,6 +26,7 @@
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
/*
@@ -58,6 +59,7 @@
#include <sys/ddi_impldefs.h>
#include <sys/pci.h>
#include <sys/promif.h>
+#include <sys/prom_debug.h>
#include <sys/x86_archext.h>
#include <sys/cpc_impl.h>
#include <sys/uadmin.h>
@@ -249,16 +251,23 @@ _info(struct modinfo *modinfop)
static int
apic_probe(void)
{
+ PRM_POINT("apic_probe()");
+
/* check if apix is initialized */
- if (apix_enable && apix_loaded())
+ if (apix_enable && apix_loaded()) {
+ PRM_POINT("apic_probe FAILURE: apix is loaded");
return (PSM_FAILURE);
+ }
/*
* Check whether x2APIC mode was activated by BIOS. We don't support
* that in pcplusmp as apix normally handles that.
*/
- if (apic_local_mode() == LOCAL_X2APIC)
+ PRM_POINT("apic_local_mode()");
+ if (apic_local_mode() == LOCAL_X2APIC) {
+ PRM_POINT("apic_probe FAILURE: in x2apic mode");
return (PSM_FAILURE);
+ }
/* continue using pcplusmp PSM */
apix_enable = 0;
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 1046a54126..e9a34f8630 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -203,7 +203,7 @@ check_svm_features(void)
*/
if (nasid == 0 || nasid > regs[1])
nasid = regs[1];
- KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %#x", nasid));
+ KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %x", nasid));
/* bhyve requires the Nested Paging feature */
if (!(svm_feature & AMD_CPUID_SVM_NP)) {
@@ -386,11 +386,11 @@ svm_msr_perm(uint8_t *perm_bitmap, uint64_t msr, bool read, bool write)
int index, bit, error;
error = svm_msr_index(msr, &index, &bit);
- KASSERT(error == 0, ("%s: invalid msr %#lx", __func__, msr));
+ KASSERT(error == 0, ("%s: invalid msr %lx", __func__, msr));
KASSERT(index >= 0 && index < SVM_MSR_BITMAP_SIZE,
- ("%s: invalid index %d for msr %#lx", __func__, index, msr));
+ ("%s: invalid index %d for msr %lx", __func__, index, msr));
KASSERT(bit >= 0 && bit <= 6, ("%s: invalid bit position %d "
- "msr %#lx", __func__, bit, msr));
+ "msr %lx", __func__, bit, msr));
if (read)
perm_bitmap[index] &= ~(1UL << bit);
@@ -444,7 +444,7 @@ svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask,
if (ctrl->intercept[idx] != oldval) {
svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
VCPU_CTR3(sc->vm, vcpu, "intercept[%d] modified "
- "from %#x to %#x", idx, oldval, ctrl->intercept[idx]);
+ "from %x to %x", idx, oldval, ctrl->intercept[idx]);
}
}
@@ -527,11 +527,23 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MONITOR);
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MWAIT);
+ /* Intercept privileged invalidation instructions. */
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVD);
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVLPGA);
+
/*
+ * Intercept all virtualization-related instructions.
+ *
* From section "Canonicalization and Consistency Checks" in APMv2
* the VMRUN intercept bit must be set to pass the consistency check.
*/
svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN);
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMMCALL);
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMLOAD);
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMSAVE);
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_STGI);
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI);
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT);
/*
* The ASID will be set to a non-zero value just before VMRUN.
@@ -672,22 +684,19 @@ svm_cpl(struct vmcb_state *state)
static enum vm_cpu_mode
svm_vcpu_mode(struct vmcb *vmcb)
{
- struct vmcb_segment seg;
struct vmcb_state *state;
- int error;
state = &vmcb->state;
if (state->efer & EFER_LMA) {
- error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
- KASSERT(error == 0, ("%s: vmcb_seg(cs) error %d", __func__,
- error));
+ struct vmcb_segment *seg;
/*
* Section 4.8.1 for APM2, check if Code Segment has
* Long attribute set in descriptor.
*/
- if (seg.attrib & VMCB_CS_ATTRIB_L)
+ seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
+ if (seg->attrib & VMCB_CS_ATTRIB_L)
return (CPU_MODE_64BIT);
else
return (CPU_MODE_COMPATIBILITY);
@@ -848,10 +857,9 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
struct vmcb *vmcb;
struct vie *vie;
struct vm_guest_paging paging;
- struct vmcb_segment seg;
+ struct vmcb_segment *seg;
char *inst_bytes = NULL;
uint8_t inst_len = 0;
- int error;
vmcb = svm_get_vmcb(svm_sc, vcpu);
ctrl = &vmcb->ctrl;
@@ -861,22 +869,21 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
vmexit->u.mmio_emul.gla = VIE_INVALID_GLA;
svm_paging_info(vmcb, &paging);
- error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
- KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error));
-
switch (paging.cpu_mode) {
case CPU_MODE_REAL:
- vmexit->u.mmio_emul.cs_base = seg.base;
+ seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
+ vmexit->u.mmio_emul.cs_base = seg->base;
vmexit->u.mmio_emul.cs_d = 0;
break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
- vmexit->u.mmio_emul.cs_base = seg.base;
+ seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
+ vmexit->u.mmio_emul.cs_base = seg->base;
/*
* Section 4.8.1 of APM2, Default Operand Size or D bit.
*/
- vmexit->u.mmio_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
+ vmexit->u.mmio_emul.cs_d = (seg->attrib & VMCB_CS_ATTRIB_D) ?
1 : 0;
break;
default:
@@ -927,7 +934,7 @@ svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector,
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0,
- ("%s: event already pending %#lx", __func__, ctrl->eventinj));
+ ("%s: event already pending %lx", __func__, ctrl->eventinj));
KASSERT(vector >=0 && vector <= 255, ("%s: invalid vector %d",
__func__, vector));
@@ -949,7 +956,7 @@ svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector,
if (ec_valid) {
ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID;
ctrl->eventinj |= (uint64_t)error << 32;
- VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %#x",
+ VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %x",
intrtype_to_str(intr_type), vector, error);
} else {
VCPU_CTR2(sc->vm, vcpu, "Injecting %s at vector %d",
@@ -1050,32 +1057,6 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR);
}
-static int
-svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val)
-{
- struct vmcb_ctrl *ctrl;
- int oldval, newval;
-
- ctrl = svm_get_vmcb_ctrl(sc, vcpu);
- oldval = ctrl->intr_shadow;
- newval = val ? 1 : 0;
- if (newval != oldval) {
- ctrl->intr_shadow = newval;
- VCPU_CTR1(sc->vm, vcpu, "Setting intr_shadow to %d", newval);
- }
- return (0);
-}
-
-static int
-svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val)
-{
- struct vmcb_ctrl *ctrl;
-
- ctrl = svm_get_vmcb_ctrl(sc, vcpu);
- *val = ctrl->intr_shadow;
- return (0);
-}
-
/*
* Once an NMI is injected it blocks delivery of further NMIs until the handler
* executes an IRET. The IRET intercept is enabled when an NMI is injected to
@@ -1103,7 +1084,7 @@ enable_nmi_blocking(struct svm_softc *sc, int vcpu)
static void
clear_nmi_blocking(struct svm_softc *sc, int vcpu)
{
- int error;
+ struct vmcb_ctrl *ctrl;
KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared");
@@ -1124,8 +1105,8 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu)
* Set 'intr_shadow' to prevent an NMI from being injected on the
* immediate VMRUN.
*/
- error = svm_modify_intr_shadow(sc, vcpu, 1);
- KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error));
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ ctrl->intr_shadow = 1;
}
#define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL
@@ -1141,7 +1122,7 @@ svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval, bool *retu)
state = svm_get_vmcb_state(sc, vcpu);
oldval = state->efer;
- VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %#lx/%#lx", oldval, newval);
+ VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %lx/%lx", oldval, newval);
newval &= ~0xFE; /* clear the Read-As-Zero (RAZ) bits */
changed = oldval ^ newval;
@@ -1275,7 +1256,7 @@ exit_reason_to_str(uint64_t reason)
case VMCB_EXIT_MWAIT:
return ("mwait");
default:
- snprintf(reasonbuf, sizeof(reasonbuf), "%#lx", reason);
+ snprintf(reasonbuf, sizeof(reasonbuf), "%lx", reason);
return (reasonbuf);
}
}
@@ -1350,10 +1331,10 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
}
KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event "
- "injection valid bit is set %#lx", __func__, ctrl->eventinj));
+ "injection valid bit is set %lx", __func__, ctrl->eventinj));
KASSERT(vmexit->inst_length >= 0 && vmexit->inst_length <= 15,
- ("invalid inst_length %d: code (%#lx), info1 (%#lx), info2 (%#lx)",
+ ("invalid inst_length %d: code (%lx), info1 (%lx), info2 (%lx)",
vmexit->inst_length, code, info1, info2));
svm_update_virqinfo(svm_sc, vcpu);
@@ -1445,7 +1426,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
if (reflect) {
/* Reflect the exception back into the guest */
VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception "
- "%d/%#x into the guest", idtvec, (int)info1);
+ "%d/%x into the guest", idtvec, (int)info1);
error = vm_inject_exception(svm_sc->vm, vcpu, idtvec,
errcode_valid, info1, 0);
KASSERT(error == 0, ("%s: vm_inject_exception error %d",
@@ -1462,7 +1443,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
if (info1) {
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
val = (uint64_t)edx << 32 | eax;
- VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %#x val %#lx",
+ VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %x val %lx",
ecx, val);
if (emulate_wrmsr(svm_sc, vcpu, ecx, val, &retu)) {
vmexit->exitcode = VM_EXITCODE_WRMSR;
@@ -1475,7 +1456,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
("emulate_wrmsr retu with bogus exitcode"));
}
} else {
- VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %#x", ecx);
+ VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %x", ecx);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
if (emulate_rdmsr(svm_sc, vcpu, ecx, &retu)) {
vmexit->exitcode = VM_EXITCODE_RDMSR;
@@ -1492,6 +1473,31 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
handled = svm_handle_inout(svm_sc, vcpu, vmexit);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
break;
+ case VMCB_EXIT_SHUTDOWN:
+ vm_suspend(svm_sc->vm, VM_SUSPEND_TRIPLEFAULT);
+ handled = 1;
+ break;
+ case VMCB_EXIT_INVD:
+ case VMCB_EXIT_INVLPGA:
+ /* privileged invalidation instructions */
+ vm_inject_ud(svm_sc->vm, vcpu);
+ handled = 1;
+ break;
+ case VMCB_EXIT_VMRUN:
+ case VMCB_EXIT_VMLOAD:
+ case VMCB_EXIT_VMSAVE:
+ case VMCB_EXIT_STGI:
+ case VMCB_EXIT_CLGI:
+ case VMCB_EXIT_SKINIT:
+ /* privileged vmm instructions */
+ vm_inject_ud(svm_sc->vm, vcpu);
+ handled = 1;
+ break;
+ case VMCB_EXIT_VMMCALL:
+ /* No handlers make use of VMMCALL for now */
+ vm_inject_ud(svm_sc->vm, vcpu);
+ handled = 1;
+ break;
case VMCB_EXIT_CPUID:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
handled = x86_emulate_cpuid(svm_sc->vm, vcpu, &state->rax,
@@ -1510,7 +1516,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
/* EXITINFO2 contains the faulting guest physical address */
if (info1 & VMCB_NPF_INFO1_RSV) {
VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
- "reserved bits set: info1(%#lx) info2(%#lx)",
+ "reserved bits set: info1(%lx) info2(%lx)",
info1, info2);
} else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
@@ -1518,13 +1524,13 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
vmexit->u.paging.fault_type = npf_fault_type(info1);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
VCPU_CTR3(svm_sc->vm, vcpu, "nested page fault "
- "on gpa %#lx/%#lx at rip %#lx",
+ "on gpa %lx/%lx at rip %lx",
info2, info1, state->rip);
} else if (svm_npf_emul_fault(info1)) {
svm_handle_mmio_emul(svm_sc, vcpu, vmexit, info2);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MMIO_EMUL, 1);
VCPU_CTR3(svm_sc->vm, vcpu, "mmio_emul fault "
- "for gpa %#lx/%#lx at rip %#lx",
+ "for gpa %lx/%lx at rip %lx",
info2, info1, state->rip);
}
break;
@@ -1539,7 +1545,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
break;
}
- VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d",
+ VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %lx/%d",
handled ? "handled" : "unhandled", exit_reason_to_str(code),
vmexit->rip, vmexit->inst_length);
@@ -1576,14 +1582,14 @@ svm_inj_intinfo(struct svm_softc *svm_sc, int vcpu)
return;
KASSERT(VMCB_EXITINTINFO_VALID(intinfo), ("%s: entry intinfo is not "
- "valid: %#lx", __func__, intinfo));
+ "valid: %lx", __func__, intinfo));
svm_eventinject(svm_sc, vcpu, VMCB_EXITINTINFO_TYPE(intinfo),
VMCB_EXITINTINFO_VECTOR(intinfo),
VMCB_EXITINTINFO_EC(intinfo),
VMCB_EXITINTINFO_EC_VALID(intinfo));
vmm_stat_incr(svm_sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1);
- VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %#lx", intinfo);
+ VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %lx", intinfo);
}
/*
@@ -1610,7 +1616,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
if (vcpustate->nextrip != state->rip) {
ctrl->intr_shadow = 0;
VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
- "cleared due to rip change: %#lx/%#lx",
+ "cleared due to rip change: %lx/%lx",
vcpustate->nextrip, state->rip);
}
@@ -1648,7 +1654,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
* then defer the NMI until after that.
*/
VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to "
- "eventinj %#lx", ctrl->eventinj);
+ "eventinj %lx", ctrl->eventinj);
/*
* Use self-IPI to trigger a VM-exit as soon as
@@ -1694,7 +1700,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
*/
if ((state->rflags & PSL_I) == 0) {
VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to "
- "rflags %#lx", vector, state->rflags);
+ "rflags %lx", vector, state->rflags);
need_intr_window = 1;
goto done;
}
@@ -1708,7 +1714,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to "
- "eventinj %#lx", vector, ctrl->eventinj);
+ "eventinj %lx", vector, ctrl->eventinj);
need_intr_window = 1;
goto done;
}
@@ -1742,9 +1748,9 @@ done:
* VMRUN.
*/
v_tpr = vlapic_get_cr8(vlapic);
- KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr));
+ KASSERT(v_tpr <= 15, ("invalid v_tpr %x", v_tpr));
if (ctrl->v_tpr != v_tpr) {
- VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x",
+ VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %x to %x",
ctrl->v_tpr, v_tpr);
ctrl->v_tpr = v_tpr;
svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
@@ -1762,8 +1768,8 @@ done:
*/
KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 ||
(state->rflags & PSL_I) == 0 || ctrl->intr_shadow,
- ("Bogus intr_window_exiting: eventinj (%#lx), "
- "intr_shadow (%u), rflags (%#lx)",
+ ("Bogus intr_window_exiting: eventinj (%lx), "
+ "intr_shadow (%lu), rflags (%lx)",
ctrl->eventinj, ctrl->intr_shadow, state->rflags));
enable_intr_window_exiting(sc, vcpu);
} else {
@@ -1838,7 +1844,7 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
*/
KASSERT(!alloc_asid, ("ASID allocation not necessary"));
KASSERT(ctrl->tlb_ctrl == VMCB_TLB_FLUSH_NOTHING,
- ("Invalid VMCB tlb_ctrl: %#x", ctrl->tlb_ctrl));
+ ("Invalid VMCB tlb_ctrl: %x", ctrl->tlb_ctrl));
}
if (alloc_asid) {
@@ -1968,7 +1974,7 @@ svm_dr_leave_guest(struct svm_regctx *gctx)
* Start vcpu with specified RIP.
*/
static int
-svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
+svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
struct vm_eventinfo *evinfo)
{
struct svm_regctx *gctx;
@@ -2109,10 +2115,10 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
ctrl->vmcb_clean = vmcb_clean & ~vcpustate->dirty;
vcpustate->dirty = 0;
- VCPU_CTR1(vm, vcpu, "vmcb clean %#x", ctrl->vmcb_clean);
+ VCPU_CTR1(vm, vcpu, "vmcb clean %x", ctrl->vmcb_clean);
/* Launch Virtual Machine. */
- VCPU_CTR1(vm, vcpu, "Resume execution at %#lx", state->rip);
+ VCPU_CTR1(vm, vcpu, "Resume execution at %lx", state->rip);
svm_dr_enter_guest(gctx);
svm_launch(vmcb_pa, gctx, get_pcpu());
svm_dr_leave_guest(gctx);
@@ -2152,10 +2158,9 @@ svm_vmcleanup(void *arg)
free(sc, M_SVM);
}
-static register_t *
+static uint64_t *
swctx_regptr(struct svm_regctx *regctx, int reg)
{
-
switch (reg) {
case VM_REG_GUEST_RBX:
return (&regctx->sctx_rbx);
@@ -2201,56 +2206,135 @@ swctx_regptr(struct svm_regctx *regctx, int reg)
static int
svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
{
- struct svm_softc *svm_sc;
- register_t *reg;
-
- svm_sc = arg;
+ struct svm_softc *sc;
+ struct vmcb *vmcb;
+ uint64_t *regp;
+ uint64_t *fieldp;
+ struct vmcb_segment *seg;
- if (ident == VM_REG_GUEST_INTR_SHADOW) {
- return (svm_get_intr_shadow(svm_sc, vcpu, val));
- }
+ sc = arg;
+ vmcb = svm_get_vmcb(sc, vcpu);
- if (vmcb_read(svm_sc, vcpu, ident, val) == 0) {
+ regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident);
+ if (regp != NULL) {
+ *val = *regp;
return (0);
}
- reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
+ switch (ident) {
+ case VM_REG_GUEST_INTR_SHADOW:
+ *val = (vmcb->ctrl.intr_shadow != 0) ? 1 : 0;
+ break;
- if (reg != NULL) {
- *val = *reg;
- return (0);
+ case VM_REG_GUEST_CR0:
+ case VM_REG_GUEST_CR2:
+ case VM_REG_GUEST_CR3:
+ case VM_REG_GUEST_CR4:
+ case VM_REG_GUEST_DR6:
+ case VM_REG_GUEST_DR7:
+ case VM_REG_GUEST_EFER:
+ case VM_REG_GUEST_RAX:
+ case VM_REG_GUEST_RFLAGS:
+ case VM_REG_GUEST_RIP:
+ case VM_REG_GUEST_RSP:
+ fieldp = vmcb_regptr(vmcb, ident, NULL);
+ *val = *fieldp;
+ break;
+
+ case VM_REG_GUEST_CS:
+ case VM_REG_GUEST_DS:
+ case VM_REG_GUEST_ES:
+ case VM_REG_GUEST_FS:
+ case VM_REG_GUEST_GS:
+ case VM_REG_GUEST_SS:
+ case VM_REG_GUEST_LDTR:
+ case VM_REG_GUEST_TR:
+ seg = vmcb_segptr(vmcb, ident);
+ *val = seg->selector;
+ break;
+
+ case VM_REG_GUEST_GDTR:
+ case VM_REG_GUEST_IDTR:
+ /* GDTR and IDTR don't have segment selectors */
+ return (EINVAL);
+
+ default:
+ return (EINVAL);
}
- VCPU_CTR1(svm_sc->vm, vcpu, "svm_getreg: unknown register %#x", ident);
- return (EINVAL);
+ return (0);
}
static int
svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
{
- struct svm_softc *svm_sc;
- register_t *reg;
-
- svm_sc = arg;
+ struct svm_softc *sc;
+ struct vmcb *vmcb;
+ uint64_t *regp;
+ uint64_t *fieldp;
+ uint32_t dirty;
+ struct vmcb_segment *seg;
- if (ident == VM_REG_GUEST_INTR_SHADOW) {
- return (svm_modify_intr_shadow(svm_sc, vcpu, val));
- }
+ sc = arg;
+ vmcb = svm_get_vmcb(sc, vcpu);
- if (vmcb_write(svm_sc, vcpu, ident, val) == 0) {
+ regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident);
+ if (regp != NULL) {
+ *regp = val;
return (0);
}
- reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
+ dirty = VMCB_CACHE_NONE;
+ switch (ident) {
+ case VM_REG_GUEST_INTR_SHADOW:
+ vmcb->ctrl.intr_shadow = (val != 0) ? 1 : 0;
+ break;
- if (reg != NULL) {
- *reg = val;
- return (0);
+ case VM_REG_GUEST_EFER:
+ fieldp = vmcb_regptr(vmcb, ident, &dirty);
+ /* EFER_SVM must always be set when the guest is executing */
+ *fieldp = val | EFER_SVM;
+ dirty |= VMCB_CACHE_CR;
+ break;
+
+ case VM_REG_GUEST_CR0:
+ case VM_REG_GUEST_CR2:
+ case VM_REG_GUEST_CR3:
+ case VM_REG_GUEST_CR4:
+ case VM_REG_GUEST_DR6:
+ case VM_REG_GUEST_DR7:
+ case VM_REG_GUEST_RAX:
+ case VM_REG_GUEST_RFLAGS:
+ case VM_REG_GUEST_RIP:
+ case VM_REG_GUEST_RSP:
+ fieldp = vmcb_regptr(vmcb, ident, &dirty);
+ *fieldp = val;
+ break;
+
+ case VM_REG_GUEST_CS:
+ case VM_REG_GUEST_DS:
+ case VM_REG_GUEST_ES:
+ case VM_REG_GUEST_SS:
+ case VM_REG_GUEST_FS:
+ case VM_REG_GUEST_GS:
+ case VM_REG_GUEST_LDTR:
+ case VM_REG_GUEST_TR:
+ dirty |= VMCB_CACHE_SEG;
+ seg = vmcb_segptr(vmcb, ident);
+ seg->selector = (uint16_t)val;
+ break;
+
+ case VM_REG_GUEST_GDTR:
+ case VM_REG_GUEST_IDTR:
+ /* GDTR and IDTR don't have segment selectors */
+ return (EINVAL);
+
+ default:
+ return (EINVAL);
}
- if (ident == VM_REG_GUEST_ENTRY_INST_LENGTH) {
- /* Ignore. */
- return (0);
+ if (dirty != VMCB_CACHE_NONE) {
+ svm_set_dirty(sc, vcpu, dirty);
}
/*
@@ -2259,8 +2343,119 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
* whether 'running' is true/false.
*/
- VCPU_CTR1(svm_sc->vm, vcpu, "svm_setreg: unknown register %#x", ident);
- return (EINVAL);
+ return (0);
+}
+
+static int
+svm_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+{
+ struct vmcb *vmcb;
+ struct svm_softc *sc;
+ struct vmcb_segment *seg;
+
+ sc = arg;
+ vmcb = svm_get_vmcb(sc, vcpu);
+
+ switch (reg) {
+ case VM_REG_GUEST_CS:
+ case VM_REG_GUEST_DS:
+ case VM_REG_GUEST_ES:
+ case VM_REG_GUEST_SS:
+ case VM_REG_GUEST_FS:
+ case VM_REG_GUEST_GS:
+ case VM_REG_GUEST_LDTR:
+ case VM_REG_GUEST_TR:
+ svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
+ seg = vmcb_segptr(vmcb, reg);
+ /*
+ * Map seg_desc access to VMCB attribute format.
+ *
+ * SVM uses the 'P' bit in the segment attributes to indicate a
+ * NULL segment so clear it if the segment is marked unusable.
+ */
+ seg->attrib = VMCB_ACCESS2ATTR(desc->access);
+ if (SEG_DESC_UNUSABLE(desc->access)) {
+ seg->attrib &= ~0x80;
+ }
+ break;
+
+ case VM_REG_GUEST_GDTR:
+ case VM_REG_GUEST_IDTR:
+ svm_set_dirty(sc, vcpu, VMCB_CACHE_DT);
+ seg = vmcb_segptr(vmcb, reg);
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ ASSERT(seg != NULL);
+ seg->base = desc->base;
+ seg->limit = desc->limit;
+
+ return (0);
+}
+
+static int
+svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+{
+ struct vmcb *vmcb;
+ struct svm_softc *sc;
+ struct vmcb_segment *seg;
+
+ sc = arg;
+ vmcb = svm_get_vmcb(sc, vcpu);
+
+ switch (reg) {
+ case VM_REG_GUEST_DS:
+ case VM_REG_GUEST_ES:
+ case VM_REG_GUEST_FS:
+ case VM_REG_GUEST_GS:
+ case VM_REG_GUEST_SS:
+ case VM_REG_GUEST_LDTR:
+ seg = vmcb_segptr(vmcb, reg);
+ desc->access = VMCB_ATTR2ACCESS(seg->attrib);
+ /*
+ * VT-x uses bit 16 to indicate a segment that has been loaded
+ * with a NULL selector (aka unusable). The 'desc->access'
+ * field is interpreted in the VT-x format by the
+ * processor-independent code.
+ *
+ * SVM uses the 'P' bit to convey the same information so
+ * convert it into the VT-x format. For more details refer to
+ * section "Segment State in the VMCB" in APMv2.
+ */
+ if ((desc->access & 0x80) == 0) {
+ /* Unusable segment */
+ desc->access |= 0x10000;
+ }
+ break;
+
+ case VM_REG_GUEST_CS:
+ case VM_REG_GUEST_TR:
+ seg = vmcb_segptr(vmcb, reg);
+ desc->access = VMCB_ATTR2ACCESS(seg->attrib);
+ break;
+
+ case VM_REG_GUEST_GDTR:
+ case VM_REG_GUEST_IDTR:
+ seg = vmcb_segptr(vmcb, reg);
+ /*
+ * Since there are no access bits associated with the GDTR or
+ * the IDTR, zero out the field to ensure it does not contain
+ * garbage which might confuse the consumer.
+ */
+ desc->access = 0;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ ASSERT(seg != NULL);
+ desc->base = seg->base;
+ desc->limit = seg->limit;
+ return (0);
}
static int
@@ -2368,8 +2563,8 @@ struct vmm_ops vmm_ops_amd = {
.vmcleanup = svm_vmcleanup,
.vmgetreg = svm_getreg,
.vmsetreg = svm_setreg,
- .vmgetdesc = vmcb_getdesc,
- .vmsetdesc = vmcb_setdesc,
+ .vmgetdesc = svm_getdesc,
+ .vmsetdesc = svm_setdesc,
.vmgetcap = svm_getcap,
.vmsetcap = svm_setcap,
.vmspace_alloc = svm_npt_alloc,
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.h b/usr/src/uts/i86pc/io/vmm/amd/svm.h
index c78f7eb067..19739884c2 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.h
@@ -35,31 +35,31 @@
* Guest register state that is saved outside the VMCB.
*/
struct svm_regctx {
- register_t sctx_rbp;
- register_t sctx_rbx;
- register_t sctx_rcx;
- register_t sctx_rdx;
- register_t sctx_rdi;
- register_t sctx_rsi;
- register_t sctx_r8;
- register_t sctx_r9;
- register_t sctx_r10;
- register_t sctx_r11;
- register_t sctx_r12;
- register_t sctx_r13;
- register_t sctx_r14;
- register_t sctx_r15;
- register_t sctx_dr0;
- register_t sctx_dr1;
- register_t sctx_dr2;
- register_t sctx_dr3;
+ uint64_t sctx_rbp;
+ uint64_t sctx_rbx;
+ uint64_t sctx_rcx;
+ uint64_t sctx_rdx;
+ uint64_t sctx_rdi;
+ uint64_t sctx_rsi;
+ uint64_t sctx_r8;
+ uint64_t sctx_r9;
+ uint64_t sctx_r10;
+ uint64_t sctx_r11;
+ uint64_t sctx_r12;
+ uint64_t sctx_r13;
+ uint64_t sctx_r14;
+ uint64_t sctx_r15;
+ uint64_t sctx_dr0;
+ uint64_t sctx_dr1;
+ uint64_t sctx_dr2;
+ uint64_t sctx_dr3;
- register_t host_dr0;
- register_t host_dr1;
- register_t host_dr2;
- register_t host_dr3;
- register_t host_dr6;
- register_t host_dr7;
+ uint64_t host_dr0;
+ uint64_t host_dr1;
+ uint64_t host_dr2;
+ uint64_t host_dr3;
+ uint64_t host_dr6;
+ uint64_t host_dr7;
uint64_t host_debugctl;
};
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
index 5075b69867..b00f974c23 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
@@ -26,429 +26,130 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <machine/segments.h>
-#include <machine/specialreg.h>
#include <machine/vmm.h>
-#include "vmm_ktr.h"
-
#include "vmcb.h"
#include "svm.h"
-#include "svm_softc.h"
-/*
- * The VMCB aka Virtual Machine Control Block is a 4KB aligned page
- * in memory that describes the virtual machine.
- *
- * The VMCB contains:
- * - instructions or events in the guest to intercept
- * - control bits that modify execution environment of the guest
- * - guest processor state (e.g. general purpose registers)
- */
-
-/*
- * Return VMCB segment area.
- */
-static struct vmcb_segment *
+struct vmcb_segment *
vmcb_segptr(struct vmcb *vmcb, int type)
{
- struct vmcb_state *state;
- struct vmcb_segment *seg;
-
- state = &vmcb->state;
+ struct vmcb_state *state = &vmcb->state;
switch (type) {
case VM_REG_GUEST_CS:
- seg = &state->cs;
- break;
-
+ return (&state->cs);
case VM_REG_GUEST_DS:
- seg = &state->ds;
- break;
-
+ return (&state->ds);
case VM_REG_GUEST_ES:
- seg = &state->es;
- break;
-
+ return (&state->es);
case VM_REG_GUEST_FS:
- seg = &state->fs;
- break;
-
+ return (&state->fs);
case VM_REG_GUEST_GS:
- seg = &state->gs;
- break;
-
+ return (&state->gs);
case VM_REG_GUEST_SS:
- seg = &state->ss;
- break;
-
+ return (&state->ss);
case VM_REG_GUEST_GDTR:
- seg = &state->gdt;
- break;
-
+ return (&state->gdt);
case VM_REG_GUEST_IDTR:
- seg = &state->idt;
- break;
-
+ return (&state->idt);
case VM_REG_GUEST_LDTR:
- seg = &state->ldt;
- break;
-
+ return (&state->ldt);
case VM_REG_GUEST_TR:
- seg = &state->tr;
- break;
-
+ return (&state->tr);
default:
- seg = NULL;
- break;
+ panic("unexpected seg %d", type);
}
-
- return (seg);
}
-static int
-vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident,
- uint64_t *val)
+uint64_t *
+vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp)
{
- struct vmcb *vmcb;
- int off, bytes;
- char *ptr;
-
- vmcb = svm_get_vmcb(softc, vcpu);
- off = VMCB_ACCESS_OFFSET(ident);
- bytes = VMCB_ACCESS_BYTES(ident);
-
- if ((off + bytes) >= sizeof (struct vmcb))
- return (EINVAL);
-
- ptr = (char *)vmcb;
-
- if (!write)
- *val = 0;
-
- switch (bytes) {
- case 8:
- case 4:
- case 2:
- if (write)
- memcpy(ptr + off, val, bytes);
- else
- memcpy(val, ptr + off, bytes);
- break;
- default:
- VCPU_CTR1(softc->vm, vcpu,
- "Invalid size %d for VMCB access: %d", bytes);
- return (EINVAL);
- }
-
- /* Invalidate all VMCB state cached by h/w. */
- if (write)
- svm_set_dirty(softc, vcpu, 0xffffffff);
-
- return (0);
-}
-
-/*
- * Read from segment selector, control and general purpose register of VMCB.
- */
-int
-vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval)
-{
- struct vmcb *vmcb;
struct vmcb_state *state;
- struct vmcb_segment *seg;
- int err;
+ uint64_t *res = NULL;
+ uint32_t dirty = VMCB_CACHE_NONE;
- vmcb = svm_get_vmcb(sc, vcpu);
state = &vmcb->state;
- err = 0;
-
- if (VMCB_ACCESS_OK(ident))
- return (vmcb_access(sc, vcpu, 0, ident, retval));
switch (ident) {
case VM_REG_GUEST_CR0:
- *retval = state->cr0;
+ res = &state->cr0;
+ dirty = VMCB_CACHE_CR;
break;
case VM_REG_GUEST_CR2:
- *retval = state->cr2;
+ res = &state->cr2;
+ dirty = VMCB_CACHE_CR2;
break;
case VM_REG_GUEST_CR3:
- *retval = state->cr3;
+ res = &state->cr3;
+ dirty = VMCB_CACHE_CR;
break;
case VM_REG_GUEST_CR4:
- *retval = state->cr4;
+ res = &state->cr4;
+ dirty = VMCB_CACHE_CR;
break;
case VM_REG_GUEST_DR6:
- *retval = state->dr6;
+ res = &state->dr6;
+ dirty = VMCB_CACHE_DR;
break;
case VM_REG_GUEST_DR7:
- *retval = state->dr7;
+ res = &state->dr7;
+ dirty = VMCB_CACHE_DR;
break;
case VM_REG_GUEST_EFER:
- *retval = state->efer;
+ res = &state->efer;
+ dirty = VMCB_CACHE_CR;
break;
case VM_REG_GUEST_RAX:
- *retval = state->rax;
+ res = &state->rax;
break;
case VM_REG_GUEST_RFLAGS:
- *retval = state->rflags;
+ res = &state->rflags;
break;
case VM_REG_GUEST_RIP:
- *retval = state->rip;
+ res = &state->rip;
break;
case VM_REG_GUEST_RSP:
- *retval = state->rsp;
- break;
-
- case VM_REG_GUEST_CS:
- case VM_REG_GUEST_DS:
- case VM_REG_GUEST_ES:
- case VM_REG_GUEST_FS:
- case VM_REG_GUEST_GS:
- case VM_REG_GUEST_SS:
- case VM_REG_GUEST_LDTR:
- case VM_REG_GUEST_TR:
- seg = vmcb_segptr(vmcb, ident);
- KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB",
- __func__, ident));
- *retval = seg->selector;
+ res = &state->rsp;
break;
- case VM_REG_GUEST_GDTR:
- case VM_REG_GUEST_IDTR:
- /* GDTR and IDTR don't have segment selectors */
- err = EINVAL;
- break;
default:
- err = EINVAL;
+ panic("unexpected register %d", ident);
break;
}
- return (err);
-}
-
-/*
- * Write to segment selector, control and general purpose register of VMCB.
- */
-int
-vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val)
-{
- struct vmcb *vmcb;
- struct vmcb_state *state;
- struct vmcb_segment *seg;
- int err, dirtyseg;
-
- vmcb = svm_get_vmcb(sc, vcpu);
- state = &vmcb->state;
- dirtyseg = 0;
- err = 0;
-
- if (VMCB_ACCESS_OK(ident))
- return (vmcb_access(sc, vcpu, 1, ident, &val));
-
- switch (ident) {
- case VM_REG_GUEST_CR0:
- state->cr0 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
- break;
-
- case VM_REG_GUEST_CR2:
- state->cr2 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR2);
- break;
-
- case VM_REG_GUEST_CR3:
- state->cr3 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
- break;
-
- case VM_REG_GUEST_CR4:
- state->cr4 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
- break;
-
- case VM_REG_GUEST_DR6:
- state->dr6 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_DR);
- break;
-
- case VM_REG_GUEST_DR7:
- state->dr7 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_DR);
- break;
-
- case VM_REG_GUEST_EFER:
- /* EFER_SVM must always be set when the guest is executing */
- state->efer = val | EFER_SVM;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
- break;
-
- case VM_REG_GUEST_RAX:
- state->rax = val;
- break;
-
- case VM_REG_GUEST_RFLAGS:
- state->rflags = val;
- break;
-
- case VM_REG_GUEST_RIP:
- state->rip = val;
- break;
-
- case VM_REG_GUEST_RSP:
- state->rsp = val;
- break;
-
- case VM_REG_GUEST_CS:
- case VM_REG_GUEST_DS:
- case VM_REG_GUEST_ES:
- case VM_REG_GUEST_SS:
- dirtyseg = 1; /* FALLTHROUGH */
- case VM_REG_GUEST_FS:
- case VM_REG_GUEST_GS:
- case VM_REG_GUEST_LDTR:
- case VM_REG_GUEST_TR:
- seg = vmcb_segptr(vmcb, ident);
- KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB",
- __func__, ident));
- seg->selector = val;
- if (dirtyseg)
- svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
- break;
-
- case VM_REG_GUEST_GDTR:
- case VM_REG_GUEST_IDTR:
- /* GDTR and IDTR don't have segment selectors */
- err = EINVAL;
- break;
- default:
- err = EINVAL;
- break;
- }
-
- return (err);
-}
-
-int
-vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg2)
-{
- struct vmcb_segment *seg;
-
- seg = vmcb_segptr(vmcb, ident);
- if (seg != NULL) {
- bcopy(seg, seg2, sizeof(struct vmcb_segment));
- return (0);
- } else {
- return (EINVAL);
- }
-}
-
-int
-vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
-{
- struct vmcb *vmcb;
- struct svm_softc *sc;
- struct vmcb_segment *seg;
- uint16_t attrib;
-
- sc = arg;
- vmcb = svm_get_vmcb(sc, vcpu);
-
- seg = vmcb_segptr(vmcb, reg);
- KASSERT(seg != NULL, ("%s: invalid segment descriptor %d",
- __func__, reg));
-
- seg->base = desc->base;
- seg->limit = desc->limit;
- if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) {
- /*
- * Map seg_desc access to VMCB attribute format.
- *
- * SVM uses the 'P' bit in the segment attributes to indicate a
- * NULL segment so clear it if the segment is marked unusable.
- */
- attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF);
- if (SEG_DESC_UNUSABLE(desc->access)) {
- attrib &= ~0x80;
- }
- seg->attrib = attrib;
- }
-
- VCPU_CTR4(sc->vm, vcpu, "Setting desc %d: base (%#lx), limit (%#x), "
- "attrib (%#x)", reg, seg->base, seg->limit, seg->attrib);
-
- switch (reg) {
- case VM_REG_GUEST_CS:
- case VM_REG_GUEST_DS:
- case VM_REG_GUEST_ES:
- case VM_REG_GUEST_SS:
- svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
- break;
- case VM_REG_GUEST_GDTR:
- case VM_REG_GUEST_IDTR:
- svm_set_dirty(sc, vcpu, VMCB_CACHE_DT);
- break;
- default:
- break;
+ ASSERT(res != NULL);
+ if (dirtyp != NULL) {
+ *dirtyp |= dirty;
}
-
- return (0);
-}
-
-int
-vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
-{
- struct vmcb *vmcb;
- struct svm_softc *sc;
- struct vmcb_segment *seg;
-
- sc = arg;
- vmcb = svm_get_vmcb(sc, vcpu);
- seg = vmcb_segptr(vmcb, reg);
- KASSERT(seg != NULL, ("%s: invalid segment descriptor %d",
- __func__, reg));
-
- desc->base = seg->base;
- desc->limit = seg->limit;
- desc->access = 0;
-
- if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) {
- /* Map seg_desc access to VMCB attribute format */
- desc->access = ((seg->attrib & 0xF00) << 4) |
- (seg->attrib & 0xFF);
-
- /*
- * VT-x uses bit 16 to indicate a segment that has been loaded
- * with a NULL selector (aka unusable). The 'desc->access'
- * field is interpreted in the VT-x format by the
- * processor-independent code.
- *
- * SVM uses the 'P' bit to convey the same information so
- * convert it into the VT-x format. For more details refer to
- * section "Segment State in the VMCB" in APMv2.
- */
- if (reg != VM_REG_GUEST_CS && reg != VM_REG_GUEST_TR) {
- if ((desc->access & 0x80) == 0)
- desc->access |= 0x10000; /* Unusable segment */
- }
- }
-
- return (0);
+ return (res);
}
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
index 88f65df66a..63b088253d 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
@@ -86,8 +86,8 @@ struct svm_softc;
#define VMCB_INTCPT_INVD BIT(22)
#define VMCB_INTCPT_PAUSE BIT(23)
#define VMCB_INTCPT_HLT BIT(24)
-#define VMCB_INTCPT_INVPG BIT(25)
-#define VMCB_INTCPT_INVPGA BIT(26)
+#define VMCB_INTCPT_INVLPG BIT(25)
+#define VMCB_INTCPT_INVLPGA BIT(26)
#define VMCB_INTCPT_IO BIT(27)
#define VMCB_INTCPT_MSR BIT(28)
#define VMCB_INTCPT_TASK_SWITCH BIT(29)
@@ -149,12 +149,21 @@ struct svm_softc;
#define VMCB_EXIT_POPF 0x71
#define VMCB_EXIT_CPUID 0x72
#define VMCB_EXIT_IRET 0x74
+#define VMCB_EXIT_INVD 0x76
#define VMCB_EXIT_PAUSE 0x77
#define VMCB_EXIT_HLT 0x78
+#define VMCB_EXIT_INVLPG 0x79
+#define VMCB_EXIT_INVLPGA 0x7A
#define VMCB_EXIT_IO 0x7B
#define VMCB_EXIT_MSR 0x7C
#define VMCB_EXIT_SHUTDOWN 0x7F
+#define VMCB_EXIT_VMRUN 0x80
+#define VMCB_EXIT_VMMCALL 0x81
+#define VMCB_EXIT_VMLOAD 0x82
#define VMCB_EXIT_VMSAVE 0x83
+#define VMCB_EXIT_STGI 0x84
+#define VMCB_EXIT_CLGI 0x85
+#define VMCB_EXIT_SKINIT 0x86
#define VMCB_EXIT_MONITOR 0x8A
#define VMCB_EXIT_MWAIT 0x8B
#define VMCB_EXIT_NPF 0x400
@@ -212,15 +221,6 @@ struct svm_softc;
#define VMCB_OFF_SYSENTER_EIP VMCB_OFF_STATE(0x238)
#define VMCB_OFF_GUEST_PAT VMCB_OFF_STATE(0x268)
-/*
- * Encode the VMCB offset and bytes that we want to read from VMCB.
- */
-#define VMCB_ACCESS(o, w) (0x80000000 | (((w) & 0xF) << 16) | \
- ((o) & 0xFFF))
-#define VMCB_ACCESS_OK(v) ((v) & 0x80000000 )
-#define VMCB_ACCESS_BYTES(v) (((v) >> 16) & 0xF)
-#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF)
-
#ifdef _KERNEL
/* VMCB save state area segment format */
struct vmcb_segment {
@@ -231,6 +231,10 @@ struct vmcb_segment {
};
CTASSERT(sizeof(struct vmcb_segment) == 16);
+/* Convert to/from vmcb segment access to generic (VMX) access */
+#define VMCB_ATTR2ACCESS(attr) ((((attr) & 0xf00) << 4) | ((attr) & 0xff))
+#define VMCB_ACCESS2ATTR(acc) ((((acc) & 0xf000) >> 4) | ((acc) & 0xff))
+
/* Code segment descriptor attribute in 12 bit format as saved by VMCB. */
#define VMCB_CS_ATTRIB_L BIT(9) /* Long mode. */
#define VMCB_CS_ATTRIB_D BIT(10) /* OPerand size bit. */
@@ -360,6 +364,15 @@ struct vmcb_state {
CTASSERT(sizeof(struct vmcb_state) == 0xC00);
CTASSERT(offsetof(struct vmcb_state, int_to) == 0x290);
+/*
+ * The VMCB aka Virtual Machine Control Block is a 4KB aligned page
+ * in memory that describes the virtual machine.
+ *
+ * The VMCB contains:
+ * - instructions or events in the guest to intercept
+ * - control bits that modify execution environment of the guest
+ * - guest processor state (e.g. general purpose registers)
+ */
struct vmcb {
struct vmcb_ctrl ctrl;
struct vmcb_state state;
@@ -367,11 +380,8 @@ struct vmcb {
CTASSERT(sizeof(struct vmcb) == PAGE_SIZE);
CTASSERT(offsetof(struct vmcb, state) == 0x400);
-int vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval);
-int vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val);
-int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
-int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
-int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg);
+struct vmcb_segment *vmcb_segptr(struct vmcb *vmcb, int type);
+uint64_t *vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp);
#endif /* _KERNEL */
#endif /* _VMCB_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c
index 5e5253780e..5e3bd6d309 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/ept.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c
@@ -59,7 +59,6 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
-#include "vmx_cpufunc.h"
#include "ept.h"
#define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0))
@@ -171,31 +170,12 @@ ept_dump(uint64_t *ptp, int nlevels)
}
#endif
-#ifdef __FreeBSD__
-static void
-invept_single_context(void *arg)
-{
- struct invept_desc desc = *(struct invept_desc *)arg;
-
- invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
-}
-
-void
-ept_invalidate_mappings(u_long eptp)
-{
- struct invept_desc invept_desc = { 0 };
- invept_desc.eptp = eptp;
-
- smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
-}
-#else /* __FreeBSD__ */
void
ept_invalidate_mappings(u_long eptp)
{
hma_vmx_invept_allcpus((uintptr_t)eptp);
}
-#endif /* __FreeBSD__ */
static int
ept_pinit(pmap_t pmap)
diff --git a/usr/src/uts/i86pc/io/vmm/intel/offsets.in b/usr/src/uts/i86pc/io/vmm/intel/offsets.in
index cc041eaefc..ca7f967f3b 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/offsets.in
+++ b/usr/src/uts/i86pc/io/vmm/intel/offsets.in
@@ -22,7 +22,6 @@
#include <machine/pmap.h>
#include <machine/vmm.h>
-#include "intel/vmx_cpufunc.h"
#include "intel/vmx.h"
#include "vm/vm_glue.h"
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
index f1a08cc57d..36318b1b49 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
@@ -39,59 +39,24 @@
*
* Copyright 2014 Pluribus Networks Inc.
* Copyright 2017 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
-#ifdef __FreeBSD__
-#include "opt_ddb.h"
-#endif
-
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/sysctl.h>
#include <sys/systm.h>
-#include <sys/pcpu.h>
#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/segments.h>
#include <machine/vmm.h>
-#include "vmm_host.h"
-#include "vmx_cpufunc.h"
-#include "vmcs.h"
-#include "ept.h"
#include "vmx.h"
-#ifdef DDB
-#include <ddb/ddb.h>
-#endif
-
-SYSCTL_DECL(_hw_vmm_vmx);
-
-static int no_flush_rsb;
-SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
- &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
-
-static uint64_t
-vmcs_fix_regval(uint32_t encoding, uint64_t val)
-{
-
- switch (encoding) {
- case VMCS_GUEST_CR0:
- val = vmx_fix_cr0(val);
- break;
- case VMCS_GUEST_CR4:
- val = vmx_fix_cr4(val);
- break;
- default:
- break;
- }
- return (val);
-}
+/* Bits 0-30 of VMX_BASIC MSR contain VMCS revision identifier */
+#define VMX_BASIC_REVISION(v) ((v) & 0x7fffffff)
-static uint32_t
+uint32_t
vmcs_field_encoding(int ident)
{
switch (ident) {
@@ -138,15 +103,13 @@ vmcs_field_encoding(int ident)
case VM_REG_GUEST_ENTRY_INST_LENGTH:
return (VMCS_ENTRY_INST_LENGTH);
default:
- return (-1);
+ return (VMCS_INVALID_ENCODING);
}
-
}
-static int
+void
vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
{
-
switch (seg) {
case VM_REG_GUEST_ES:
*base = VMCS_GUEST_ES_BASE;
@@ -199,364 +162,111 @@ vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
*acc = VMCS_INVALID_ENCODING;
break;
default:
- return (EINVAL);
+ panic("invalid segment register %d", seg);
}
-
- return (0);
}
-int
-vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
+void
+vmcs_clear(uintptr_t vmcs_pa)
{
- int error;
- uint32_t encoding;
-
- /*
- * If we need to get at vmx-specific state in the VMCS we can bypass
- * the translation of 'ident' to 'encoding' by simply setting the
- * sign bit. As it so happens the upper 16 bits are reserved (i.e
- * set to 0) in the encodings for the VMCS so we are free to use the
- * sign bit.
- */
- if (ident < 0)
- encoding = ident & 0x7fffffff;
- else
- encoding = vmcs_field_encoding(ident);
-
- if (encoding == (uint32_t)-1)
- return (EINVAL);
+ int err;
- if (!running)
- VMPTRLD(vmcs);
+ __asm __volatile("vmclear %[addr];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (err)
+ : [addr] "m" (vmcs_pa)
+ : "memory");
- error = vmread(encoding, retval);
-
- if (!running)
- VMCLEAR(vmcs);
-
- return (error);
-}
-
-int
-vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
-{
- int error;
- uint32_t encoding;
-
- if (ident < 0)
- encoding = ident & 0x7fffffff;
- else
- encoding = vmcs_field_encoding(ident);
-
- if (encoding == (uint32_t)-1)
- return (EINVAL);
-
- val = vmcs_fix_regval(encoding, val);
-
- if (!running)
- VMPTRLD(vmcs);
-
- error = vmwrite(encoding, val);
-
- if (!running)
- VMCLEAR(vmcs);
-
- return (error);
-}
-
-int
-vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
-{
- int error;
- uint32_t base, limit, access;
-
- error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
- if (error != 0)
- panic("vmcs_setdesc: invalid segment register %d", seg);
-
- if (!running)
- VMPTRLD(vmcs);
- if ((error = vmwrite(base, desc->base)) != 0)
- goto done;
-
- if ((error = vmwrite(limit, desc->limit)) != 0)
- goto done;
-
- if (access != VMCS_INVALID_ENCODING) {
- if ((error = vmwrite(access, desc->access)) != 0)
- goto done;
+ if (err != 0) {
+ panic("vmclear(%p) error %d", (void *)vmcs_pa, err);
}
-done:
- if (!running)
- VMCLEAR(vmcs);
- return (error);
-}
-
-int
-vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
-{
- int error;
- uint32_t base, limit, access;
- uint64_t u64;
-
- error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
- if (error != 0)
- panic("vmcs_getdesc: invalid segment register %d", seg);
- if (!running)
- VMPTRLD(vmcs);
- if ((error = vmread(base, &u64)) != 0)
- goto done;
- desc->base = u64;
-
- if ((error = vmread(limit, &u64)) != 0)
- goto done;
- desc->limit = u64;
-
- if (access != VMCS_INVALID_ENCODING) {
- if ((error = vmread(access, &u64)) != 0)
- goto done;
- desc->access = u64;
- }
-done:
- if (!running)
- VMCLEAR(vmcs);
- return (error);
+ /*
+ * A call to critical_enter() was made in vmcs_load() to prevent
+ * preemption. Now that the VMCS is unloaded, it is safe to relax that
+ * restriction.
+ */
+ critical_exit();
}
-int
-vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
+void
+vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa)
{
- int error;
+ int err;
- VMPTRLD(vmcs);
+ /* set to VMCS revision */
+ vmcs->identifier = VMX_BASIC_REVISION(rdmsr(MSR_VMX_BASIC));
/*
- * Guest MSRs are saved in the VM-exit MSR-store area.
- * Guest MSRs are loaded from the VM-entry MSR-load area.
- * Both areas point to the same location in memory.
+ * Perform a vmclear on the VMCS, but without the critical section
+ * manipulation as done by vmcs_clear() above.
*/
- if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
- goto done;
- if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
- goto done;
- if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
- goto done;
-
- error = 0;
-done:
- VMCLEAR(vmcs);
- return (error);
+ __asm __volatile("vmclear %[addr];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (err)
+ : [addr] "m" (vmcs_pa)
+ : "memory");
+
+ if (err != 0) {
+ panic("vmclear(%p) error %d", (void *)vmcs_pa, err);
+ }
}
-int
-vmcs_init(struct vmcs *vmcs)
+void
+vmcs_load(uintptr_t vmcs_pa)
{
- int error, codesel, datasel, tsssel;
- u_long cr0, cr4, efer;
- uint64_t pat;
-#ifdef __FreeBSD__
- uint64_t fsbase, idtrbase;
-#endif
-
- codesel = vmm_get_host_codesel();
- datasel = vmm_get_host_datasel();
- tsssel = vmm_get_host_tsssel();
+ int err;
/*
- * Make sure we have a "current" VMCS to work with.
+ * While the VMCS is loaded on the CPU for subsequent operations, it is
+ * important that the thread not be preempted. That is ensured with
+ * critical_enter() here, with a matching critical_exit() call in
+ * vmcs_clear() once the VMCS is unloaded.
*/
- VMPTRLD(vmcs);
-
- /* Host state */
-
- /* Initialize host IA32_PAT MSR */
- pat = vmm_get_host_pat();
- if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
- goto done;
+ critical_enter();
- /* Load the IA32_EFER MSR */
- efer = vmm_get_host_efer();
- if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
- goto done;
+ __asm __volatile("vmptrld %[addr];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (err)
+ : [addr] "m" (vmcs_pa)
+ : "memory");
- /* Load the control registers */
-
- cr0 = vmm_get_host_cr0();
- if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
- goto done;
-
- cr4 = vmm_get_host_cr4() | CR4_VMXE;
- if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
- goto done;
-
- /* Load the segment selectors */
- if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
- goto done;
-
-#ifdef __FreeBSD__
- if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
- goto done;
-#else
- if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel())) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel())) != 0)
- goto done;
-#endif
-
- if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
- goto done;
-
-#ifdef __FreeBSD__
- /*
- * Load the Base-Address for %fs and idtr.
- *
- * Note that we exclude %gs, tss and gdtr here because their base
- * address is pcpu specific.
- */
- fsbase = vmm_get_host_fsbase();
- if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
- goto done;
-
- idtrbase = vmm_get_host_idtrbase();
- if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
- goto done;
+ if (err != 0) {
+ panic("vmptrld(%p) error %d", (void *)vmcs_pa, err);
+ }
+}
-#else /* __FreeBSD__ */
- /*
- * Configure host sysenter MSRs to be restored on VM exit.
- * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run.
- */
- if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL)) != 0)
- goto done;
- /* Natively defined as MSR_INTC_SEP_EIP */
- if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_EIP,
- rdmsr(MSR_SYSENTER_EIP_MSR))) != 0)
- goto done;
+uint64_t
+vmcs_read(uint32_t encoding)
+{
+ int error;
+ uint64_t val;
-#endif /* __FreeBSD__ */
+ __asm __volatile("vmread %[enc], %[val];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (error), [val] "=r" (val)
+ : [enc] "r" ((uint64_t)encoding)
+ : "memory");
- /* instruction pointer */
- if (no_flush_rsb) {
- if ((error = vmwrite(VMCS_HOST_RIP,
- (u_long)vmx_exit_guest)) != 0)
- goto done;
- } else {
- if ((error = vmwrite(VMCS_HOST_RIP,
- (u_long)vmx_exit_guest_flush_rsb)) != 0)
- goto done;
+ if (error != 0) {
+ panic("vmread(%x) error %d", encoding, error);
}
- /* link pointer */
- if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
- goto done;
-done:
- VMCLEAR(vmcs);
- return (error);
+ return (val);
}
-#ifdef DDB
-extern int vmxon_enabled[];
-
-DB_SHOW_COMMAND(vmcs, db_show_vmcs)
+void
+vmcs_write(uint32_t encoding, uint64_t val)
{
- uint64_t cur_vmcs, val;
- uint32_t exit;
-
- if (!vmxon_enabled[curcpu]) {
- db_printf("VMX not enabled\n");
- return;
- }
+ int error;
- if (have_addr) {
- db_printf("Only current VMCS supported\n");
- return;
- }
+ __asm __volatile("vmwrite %[val], %[enc];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (error)
+ : [val] "r" (val), [enc] "r" ((uint64_t)encoding)
+ : "memory");
- vmptrst(&cur_vmcs);
- if (cur_vmcs == VMCS_INITIAL) {
- db_printf("No current VM context\n");
- return;
- }
- db_printf("VMCS: %jx\n", cur_vmcs);
- db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
- db_printf("Activity: ");
- val = vmcs_read(VMCS_GUEST_ACTIVITY);
- switch (val) {
- case 0:
- db_printf("Active");
- break;
- case 1:
- db_printf("HLT");
- break;
- case 2:
- db_printf("Shutdown");
- break;
- case 3:
- db_printf("Wait for SIPI");
- break;
- default:
- db_printf("Unknown: %#lx", val);
- }
- db_printf("\n");
- exit = vmcs_read(VMCS_EXIT_REASON);
- if (exit & 0x80000000)
- db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
- else
- db_printf("Exit Reason: %u\n", exit & 0xffff);
- db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
- db_printf("Guest Linear Address: %#lx\n",
- vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
- switch (exit & 0x8000ffff) {
- case EXIT_REASON_EXCEPTION:
- case EXIT_REASON_EXT_INTR:
- val = vmcs_read(VMCS_EXIT_INTR_INFO);
- db_printf("Interrupt Type: ");
- switch (val >> 8 & 0x7) {
- case 0:
- db_printf("external");
- break;
- case 2:
- db_printf("NMI");
- break;
- case 3:
- db_printf("HW exception");
- break;
- case 4:
- db_printf("SW exception");
- break;
- default:
- db_printf("?? %lu", val >> 8 & 0x7);
- break;
- }
- db_printf(" Vector: %lu", val & 0xff);
- if (val & 0x800)
- db_printf(" Error Code: %lx",
- vmcs_read(VMCS_EXIT_INTR_ERRCODE));
- db_printf("\n");
- break;
- case EXIT_REASON_EPT_FAULT:
- case EXIT_REASON_EPT_MISCONFIG:
- db_printf("Guest Physical Address: %#lx\n",
- vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
- break;
+ if (error != 0) {
+ panic("vmwrite(%x, %lx) error %d", encoding, val, error);
}
- db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
}
-#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
index edde5c6dd5..1713872556 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
@@ -30,6 +30,7 @@
/*
* Copyright 2017 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#ifndef _VMCS_H_
@@ -41,125 +42,20 @@ struct vmcs {
uint32_t identifier;
uint32_t abort_code;
char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2];
-#ifndef __FreeBSD__
- /*
- * Keep the physical address of the VMCS cached adjacent for the
- * structure so it can be referenced in contexts which are too delicate
- * for a call into the HAT. For the moment it means wasting a whole
- * page on padding for the PA value to maintain alignment, but it
- * allows the consumers of 'struct vmcs *' to easily access the value
- * without a significant change to the interface.
- */
- uint64_t vmcs_pa;
- char _pa_pad[PAGE_SIZE - sizeof (vm_paddr_t)];
-#endif
};
-#ifdef __FreeBSD__
-CTASSERT(sizeof(struct vmcs) == PAGE_SIZE);
-#else
-CTASSERT(sizeof(struct vmcs) == (2*PAGE_SIZE));
-#endif
+CTASSERT(sizeof (struct vmcs) == PAGE_SIZE);
-/* MSR save region is composed of an array of 'struct msr_entry' */
-struct msr_entry {
- uint32_t index;
- uint32_t reserved;
- uint64_t val;
+uint32_t vmcs_field_encoding(int ident);
+void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim,
+ uint32_t *acc);
-};
+void vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa);
-int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
-int vmcs_init(struct vmcs *vmcs);
-int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv);
-int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val);
-int vmcs_getdesc(struct vmcs *vmcs, int running, int ident,
- struct seg_desc *desc);
-int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
- struct seg_desc *desc);
+void vmcs_load(uintptr_t vmcs_pa);
+void vmcs_clear(uintptr_t vmcs_pa);
-/*
- * Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h
- */
-#ifdef _VMX_CPUFUNC_H_
-static __inline uint64_t
-vmcs_read(uint32_t encoding)
-{
- int error;
- uint64_t val;
-
- error = vmread(encoding, &val);
- KASSERT(error == 0, ("vmcs_read(%u) error %d", encoding, error));
- return (val);
-}
-
-static __inline void
-vmcs_write(uint32_t encoding, uint64_t val)
-{
- int error;
-
- error = vmwrite(encoding, val);
- KASSERT(error == 0, ("vmcs_write(%u) error %d", encoding, error));
-}
-
-#ifndef __FreeBSD__
-/*
- * Due to header complexity combined with the need to cache the physical
- * address for the VMCS, these must be defined here rather than vmx_cpufunc.h.
- */
-static __inline int
-vmclear(struct vmcs *vmcs)
-{
- int error;
- uint64_t addr = vmcs->vmcs_pa;
-
- __asm __volatile("vmclear %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
- return (error);
-}
-
-static __inline int
-vmptrld(struct vmcs *vmcs)
-{
- int error;
- uint64_t addr = vmcs->vmcs_pa;
-
- __asm __volatile("vmptrld %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
- return (error);
-}
-
-static __inline void
-VMCLEAR(struct vmcs *vmcs)
-{
- int err;
-
- err = vmclear(vmcs);
- if (err != 0)
- panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
-
- critical_exit();
-}
-
-static __inline void
-VMPTRLD(struct vmcs *vmcs)
-{
- int err;
-
- critical_enter();
-
- err = vmptrld(vmcs);
- if (err != 0)
- panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
-}
-#endif /* __FreeBSD__ */
-
-#endif /* _VMX_CPUFUNC_H_ */
+uint64_t vmcs_read(uint32_t encoding);
+void vmcs_write(uint32_t encoding, uint64_t val);
#define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
#define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP)
@@ -177,7 +73,6 @@ VMPTRLD(struct vmcs *vmcs)
#define VMCS_INITIAL 0xffffffffffffffff
-#define VMCS_IDENT(encoding) ((encoding) | 0x80000000)
/*
* VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B.
*/
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index c46560948e..3e511b9f66 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -88,7 +88,6 @@ __FBSDID("$FreeBSD$");
#include "vlapic_priv.h"
#include "ept.h"
-#include "vmx_cpufunc.h"
#include "vmcs.h"
#include "vmx.h"
#include "vmx_msr.h"
@@ -172,11 +171,6 @@ SYSCTL_DECL(_hw_vmm);
SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
NULL);
-#ifdef __FreeBSD__
-int vmxon_enabled[MAXCPU];
-static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
-#endif /*__FreeBSD__ */
-
static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2;
static uint32_t exit_ctls, entry_ctls;
@@ -196,10 +190,15 @@ static int vmx_initialized;
SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD,
&vmx_initialized, 0, "Intel VMX initialized");
+static int no_flush_rsb;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
+ &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
+
/*
* Optional capabilities
*/
#ifdef __FreeBSD__
+SYSCTL_DECL(_hw_vmm_vmx);
static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap,
CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
NULL);
@@ -228,7 +227,9 @@ static int pirvec = -1;
SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD,
&pirvec, 0, "APICv posted interrupt vector");
+#ifdef __FreeBSD__
static struct unrhdr *vpid_unr;
+#endif /* __FreeBSD__*/
static u_int vpid_alloc_failed;
SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
&vpid_alloc_failed, 0, NULL);
@@ -240,6 +241,13 @@ int guest_l1d_flush_sw;
SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD,
&guest_l1d_flush_sw, 0, NULL);
+/* MSR save region is composed of an array of 'struct msr_entry' */
+struct msr_entry {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_t val;
+};
+
static struct msr_entry msr_load_list[1] __aligned(16);
/*
@@ -330,11 +338,8 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
-static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
static void vmx_inject_pir(struct vlapic *vlapic);
-#ifndef __FreeBSD__
-static int vmx_apply_tsc_adjust(struct vmx *, int);
-#endif /* __FreeBSD__ */
+static void vmx_apply_tsc_adjust(struct vmx *, int);
#ifdef KTR
static const char *
@@ -504,17 +509,15 @@ vmx_allow_x2apic_msrs(struct vmx *vmx)
return (error);
}
-u_long
+static u_long
vmx_fix_cr0(u_long cr0)
{
-
return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask);
}
-u_long
+static u_long
vmx_fix_cr4(u_long cr4)
{
-
return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask);
}
@@ -845,45 +848,12 @@ vmx_trigger_hostintr(int vector)
#endif /* __FreeBSD__ */
}
-static int
-vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial)
-{
- int error, mask_ident, shadow_ident;
- uint64_t mask_value;
-
- if (which != 0 && which != 4)
- panic("vmx_setup_cr_shadow: unknown cr%d", which);
-
- if (which == 0) {
- mask_ident = VMCS_CR0_MASK;
- mask_value = cr0_ones_mask | cr0_zeros_mask;
- shadow_ident = VMCS_CR0_SHADOW;
- } else {
- mask_ident = VMCS_CR4_MASK;
- mask_value = cr4_ones_mask | cr4_zeros_mask;
- shadow_ident = VMCS_CR4_SHADOW;
- }
-
- error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value);
- if (error)
- return (error);
-
- error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial);
- if (error)
- return (error);
-
- return (0);
-}
-#define vmx_setup_cr0_shadow(vmcs,init) vmx_setup_cr_shadow(0, (vmcs), (init))
-#define vmx_setup_cr4_shadow(vmcs,init) vmx_setup_cr_shadow(4, (vmcs), (init))
-
static void *
vmx_vminit(struct vm *vm, pmap_t pmap)
{
uint16_t vpid[VM_MAXCPU];
- int i, error;
+ int i, error, datasel;
struct vmx *vmx;
- struct vmcs *vmcs;
uint32_t exc_bitmap;
uint16_t maxcpus;
uint32_t proc_ctls, proc2_ctls, pin_ctls;
@@ -972,6 +942,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
}
maxcpus = vm_get_maxcpus(vm);
+ datasel = vmm_get_host_datasel();
for (i = 0; i < maxcpus; i++) {
/*
* Cache physical address lookups for various components which
@@ -982,31 +953,58 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]);
vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]);
- vmcs = &vmx->vmcs[i];
- vmcs->identifier = vmx_revision();
- vmcs->vmcs_pa = (uint64_t)vtophys(vmcs);
- error = vmclear(vmcs);
- if (error != 0) {
- panic("vmx_vminit: vmclear error %d on vcpu %d\n",
- error, i);
- }
+ vmx->vmcs_pa[i] = (uintptr_t)vtophys(&vmx->vmcs[i]);
+ vmcs_initialize(&vmx->vmcs[i], vmx->vmcs_pa[i]);
vmx_msr_guest_init(vmx, i);
- error = vmcs_init(vmcs);
- KASSERT(error == 0, ("vmcs_init error %d", error));
+ vmcs_load(vmx->vmcs_pa[i]);
- VMPTRLD(vmcs);
- error = 0;
+ vmcs_write(VMCS_HOST_IA32_PAT, vmm_get_host_pat());
+ vmcs_write(VMCS_HOST_IA32_EFER, vmm_get_host_efer());
+
+ /* Load the control registers */
+ vmcs_write(VMCS_HOST_CR0, vmm_get_host_cr0());
+ vmcs_write(VMCS_HOST_CR4, vmm_get_host_cr4() | CR4_VMXE);
+
+ /* Load the segment selectors */
+ vmcs_write(VMCS_HOST_CS_SELECTOR, vmm_get_host_codesel());
+
+ vmcs_write(VMCS_HOST_ES_SELECTOR, datasel);
+ vmcs_write(VMCS_HOST_SS_SELECTOR, datasel);
+ vmcs_write(VMCS_HOST_DS_SELECTOR, datasel);
- error += vmwrite(VMCS_EPTP, vmx->eptp);
- error += vmwrite(VMCS_PIN_BASED_CTLS, pin_ctls);
- error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
- error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls);
- error += vmwrite(VMCS_EXIT_CTLS, exit_ctls);
- error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
- error += vmwrite(VMCS_MSR_BITMAP, msr_bitmap_pa);
- error += vmwrite(VMCS_VPID, vpid[i]);
+ vmcs_write(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel());
+ vmcs_write(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel());
+ vmcs_write(VMCS_HOST_TR_SELECTOR, vmm_get_host_tsssel());
+
+ /*
+ * Configure host sysenter MSRs to be restored on VM exit.
+ * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run.
+ */
+ vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL);
+ vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP,
+ rdmsr(MSR_SYSENTER_EIP_MSR));
+
+ /* instruction pointer */
+ if (no_flush_rsb) {
+ vmcs_write(VMCS_HOST_RIP, (uint64_t)vmx_exit_guest);
+ } else {
+ vmcs_write(VMCS_HOST_RIP,
+ (uint64_t)vmx_exit_guest_flush_rsb);
+ }
+
+ /* link pointer */
+ vmcs_write(VMCS_LINK_POINTER, ~0);
+
+ vmcs_write(VMCS_EPTP, vmx->eptp);
+ vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls);
+ vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
+ vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls);
+ vmcs_write(VMCS_EXIT_CTLS, exit_ctls);
+ vmcs_write(VMCS_ENTRY_CTLS, entry_ctls);
+ vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa);
+ vmcs_write(VMCS_VPID, vpid[i]);
if (guest_l1d_flush && !guest_l1d_flush_sw) {
vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract(
@@ -1022,28 +1020,39 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
exc_bitmap = 0xffffffff;
else
exc_bitmap = 1 << IDT_MC;
- error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap);
+ vmcs_write(VMCS_EXCEPTION_BITMAP, exc_bitmap);
vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1;
- error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
+ vmcs_write(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) {
- error += vmwrite(VMCS_VIRTUAL_APIC, apic_page_pa);
+ vmcs_write(VMCS_VIRTUAL_APIC, apic_page_pa);
}
if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
- error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
- error += vmwrite(VMCS_EOI_EXIT0, 0);
- error += vmwrite(VMCS_EOI_EXIT1, 0);
- error += vmwrite(VMCS_EOI_EXIT2, 0);
- error += vmwrite(VMCS_EOI_EXIT3, 0);
+ vmcs_write(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
+ vmcs_write(VMCS_EOI_EXIT0, 0);
+ vmcs_write(VMCS_EOI_EXIT1, 0);
+ vmcs_write(VMCS_EOI_EXIT2, 0);
+ vmcs_write(VMCS_EOI_EXIT3, 0);
}
if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
- error += vmwrite(VMCS_PIR_VECTOR, pirvec);
- error += vmwrite(VMCS_PIR_DESC, pir_desc_pa);
+ vmcs_write(VMCS_PIR_VECTOR, pirvec);
+ vmcs_write(VMCS_PIR_DESC, pir_desc_pa);
}
- VMCLEAR(vmcs);
- KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
+
+ /*
+ * Set up the CR0/4 masks and configure the read shadow state
+ * to the power-on register value from the Intel Sys Arch.
+ * CR0 - 0x60000010
+ * CR4 - 0
+ */
+ vmcs_write(VMCS_CR0_MASK, cr0_ones_mask | cr0_zeros_mask);
+ vmcs_write(VMCS_CR0_SHADOW, 0x60000010);
+ vmcs_write(VMCS_CR4_MASK, cr4_ones_mask | cr4_zeros_mask);
+ vmcs_write(VMCS_CR4_SHADOW, 0);
+
+ vmcs_clear(vmx->vmcs_pa[i]);
vmx->cap[i].set = 0;
vmx->cap[i].proc_ctls = proc_ctls;
@@ -1054,19 +1063,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vmx->state[i].lastcpu = NOCPU;
vmx->state[i].vpid = vpid[i];
- /*
- * Set up the CR0/4 shadows, and init the read shadow
- * to the power-on register value from the Intel Sys Arch.
- * CR0 - 0x60000010
- * CR4 - 0
- */
- error = vmx_setup_cr0_shadow(vmcs, 0x60000010);
- if (error != 0)
- panic("vmx_setup_cr0_shadow %d", error);
-
- error = vmx_setup_cr4_shadow(vmcs, 0);
- if (error != 0)
- panic("vmx_setup_cr4_shadow %d", error);
vmx->ctx[i].pmap = pmap;
}
@@ -1095,7 +1091,7 @@ static __inline void
vmx_run_trace(struct vmx *vmx, int vcpu)
{
#ifdef KTR
- VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip());
+ VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %lx", vmcs_guest_rip());
#endif
}
@@ -1123,6 +1119,33 @@ vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip)
static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved");
static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done");
+#define INVVPID_TYPE_ADDRESS 0UL
+#define INVVPID_TYPE_SINGLE_CONTEXT 1UL
+#define INVVPID_TYPE_ALL_CONTEXTS 2UL
+
+struct invvpid_desc {
+ uint16_t vpid;
+ uint16_t _res1;
+ uint32_t _res2;
+ uint64_t linear_addr;
+};
+CTASSERT(sizeof(struct invvpid_desc) == 16);
+
+static __inline void
+invvpid(uint64_t type, struct invvpid_desc desc)
+{
+ int error;
+
+ __asm __volatile("invvpid %[desc], %[type];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (error)
+ : [desc] "m" (desc), [type] "r" (type)
+ : "memory");
+
+ if (error)
+ panic("invvpid error %d", error);
+}
+
/*
* Invalidate guest mappings identified by its vpid from the TLB.
*/
@@ -1190,7 +1213,6 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
{
struct vmxstate *vmxstate;
-#ifndef __FreeBSD__
/*
* Regardless of whether the VM appears to have migrated between CPUs,
* save the host sysenter stack pointer. As it points to the kernel
@@ -1203,8 +1225,7 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
* Perform any needed TSC_OFFSET adjustment based on TSC_MSR writes or
* migration between host CPUs with differing TSC values.
*/
- VERIFY0(vmx_apply_tsc_adjust(vmx, vcpu));
-#endif
+ vmx_apply_tsc_adjust(vmx, vcpu);
vmxstate = &vmx->state[vcpu];
if (vmxstate->lastcpu == curcpu)
@@ -1214,10 +1235,8 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1);
-#ifndef __FreeBSD__
/* Load the per-CPU IDT address */
vmcs_write(VMCS_HOST_IDTR_BASE, vmm_get_host_idtrbase());
-#endif
vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase());
vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase());
@@ -1245,7 +1264,7 @@ vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu)
{
KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0,
- ("intr_window_exiting not set: %#x", vmx->cap[vcpu].proc_ctls));
+ ("intr_window_exiting not set: %x", vmx->cap[vcpu].proc_ctls));
vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING;
vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting");
@@ -1267,29 +1286,12 @@ vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
{
KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0,
- ("nmi_window_exiting not set %#x", vmx->cap[vcpu].proc_ctls));
+ ("nmi_window_exiting not set %x", vmx->cap[vcpu].proc_ctls));
vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING;
vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting");
}
-#ifdef __FreeBSD__
-int
-vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset)
-{
- int error;
-
- if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) {
- vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET;
- vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
- VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting");
- }
-
- error = vmwrite(VMCS_TSC_OFFSET, offset);
-
- return (error);
-}
-#else /* __FreeBSD__ */
/*
* Set the TSC adjustment, taking into account the offsets measured between
* host physical CPUs. This is required even if the guest has not set a TSC
@@ -1297,24 +1299,20 @@ vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset)
* migrated onto. Without this mitigation, un-synched host TSCs will convey
* the appearance of TSC time-travel to the guest as its vCPUs migrate.
*/
-static int
+static void
vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu)
{
extern hrtime_t tsc_gethrtime_tick_delta(void);
const uint64_t target_offset = (vcpu_tsc_offset(vmx->vm, vcpu) +
(uint64_t)tsc_gethrtime_tick_delta());
- int error = 0;
ASSERT(vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET);
if (vmx->tsc_offset_active[vcpu] != target_offset) {
- error = vmwrite(VMCS_TSC_OFFSET, target_offset);
+ vmcs_write(VMCS_TSC_OFFSET, target_offset);
vmx->tsc_offset_active[vcpu] = target_offset;
}
-
- return (error);
}
-#endif /* __FreeBSD__ */
#define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
@@ -1333,11 +1331,11 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu)
gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest "
- "interruptibility-state %#x", gi));
+ "interruptibility-state %x", gi));
info = vmcs_read(VMCS_ENTRY_INTR_INFO);
KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid "
- "VM-entry interruption information %#x", info));
+ "VM-entry interruption information %x", info));
/*
* Inject the virtual NMI. The vector must be the NMI IDT entry
@@ -1373,7 +1371,7 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
if (vmx->state[vcpu].nextrip != guestrip &&
(gi & HWINTR_BLOCKING) != 0) {
VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking "
- "cleared due to rip change: %#lx/%#lx",
+ "cleared due to rip change: %lx/%lx",
vmx->state[vcpu].nextrip, guestrip);
gi &= ~HWINTR_BLOCKING;
vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
@@ -1390,10 +1388,10 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
- "intinfo is not valid: %#lx", __func__, entryinfo));
+ "intinfo is not valid: %lx", __func__, entryinfo));
KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
- "pending exception: %#lx/%#x", __func__, entryinfo, info));
+ "pending exception: %lx/%x", __func__, entryinfo, info));
info = entryinfo;
vector = info & 0xff;
@@ -1432,11 +1430,11 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
need_nmi_exiting = 0;
} else {
VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI "
- "due to VM-entry intr info %#x", info);
+ "due to VM-entry intr info %x", info);
}
} else {
VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to "
- "Guest Interruptibility-state %#x", gi);
+ "Guest Interruptibility-state %x", gi);
}
if (need_nmi_exiting) {
@@ -1483,18 +1481,18 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
*/
if ((gi & HWINTR_BLOCKING) != 0) {
VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
- "Guest Interruptibility-state %#x", vector, gi);
+ "Guest Interruptibility-state %x", vector, gi);
goto cantinject;
}
if ((info & VMCS_INTR_VALID) != 0) {
VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
- "VM-entry intr info %#x", vector, info);
+ "VM-entry intr info %x", vector, info);
goto cantinject;
}
rflags = vmcs_read(VMCS_GUEST_RFLAGS);
if ((rflags & PSL_I) == 0) {
VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
- "rflags %#lx", vector, rflags);
+ "rflags %lx", vector, rflags);
goto cantinject;
}
@@ -1573,7 +1571,7 @@ vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid)
gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING,
- ("NMI blocking is not in effect %#x", gi));
+ ("NMI blocking is not in effect %x", gi));
}
static int
@@ -1949,10 +1947,11 @@ vmexit_inout(struct vm_exit *vmexit, struct vie *vie, uint64_t qual,
inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
/*
- * Bits 7-9 encode the address size of ins/outs operations where
- * the 0/1/2 values correspond to 16/32/64 bit sizes.
+ * According to the SDM, bits 9:7 encode the address size of the
+ * ins/outs operation, but only values 0/1/2 are expected,
+ * corresponding to 16/32/64 bit sizes.
*/
- inout->addrsize = 2 << (1 + ((inst_info >> 7) & 0x3));
+ inout->addrsize = 2 << BITX(inst_info, 9, 7);
VERIFY(inout->addrsize == 2 || inout->addrsize == 4 ||
inout->addrsize == 8);
@@ -2224,9 +2223,7 @@ emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
static int
emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
{
- struct vmxctx *vmxctx;
uint64_t result;
- uint32_t eax, edx;
int error;
if (lapic_msr(num))
@@ -2235,14 +2232,8 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
error = vmx_rdmsr(vmx, vcpuid, num, &result, retu);
if (error == 0) {
- eax = result;
- vmxctx = &vmx->ctx[vcpuid];
- error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax);
- KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error));
-
- edx = result >> 32;
- error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx);
- KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error));
+ vmx->ctx[vcpuid].guest_rax = (uint32_t)result;
+ vmx->ctx[vcpuid].guest_rdx = result >> 32;
}
return (error);
@@ -2260,7 +2251,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
struct vie *vie;
struct vlapic *vlapic;
struct vm_task_switch *ts;
- uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
+ uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info;
uint32_t intr_type, intr_vec, reason;
uint64_t exitintinfo, qual, gpa;
bool retu;
@@ -2367,7 +2358,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
*/
if (ts->reason == TSR_IDT_GATE) {
KASSERT(idtvec_info & VMCS_IDT_VEC_VALID,
- ("invalid idtvec_info %#x for IDT task switch",
+ ("invalid idtvec_info %x for IDT task switch",
idtvec_info));
intr_type = idtvec_info & VMCS_INTR_T_MASK;
if (intr_type != VMCS_INTR_T_SWINTR &&
@@ -2496,7 +2487,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
return (1);
KASSERT((intr_info & VMCS_INTR_VALID) != 0 &&
(intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR,
- ("VM exit interruption info invalid: %#x", intr_info));
+ ("VM exit interruption info invalid: %x", intr_info));
vmx_trigger_hostintr(intr_info & 0xff);
/*
@@ -2528,7 +2519,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1);
intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
KASSERT((intr_info & VMCS_INTR_VALID) != 0,
- ("VM exit interruption info invalid: %#x", intr_info));
+ ("VM exit interruption info invalid: %x", intr_info));
intr_vec = intr_info & 0xff;
intr_type = intr_info & VMCS_INTR_T_MASK;
@@ -2580,9 +2571,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
}
if (intr_vec == IDT_PF) {
- error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual);
- KASSERT(error == 0, ("%s: vmxctx_setreg(cr2) error %d",
- __func__, error));
+ vmxctx->guest_cr2 = qual;
}
/*
@@ -2600,7 +2589,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
errcode_valid = 1;
errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE);
}
- VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into "
+ VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%x into "
"the guest", intr_vec, errcode);
SDT_PROBE5(vmm, vmx, exit, exception,
vmx, vcpu, vmexit, intr_vec, errcode);
@@ -2790,11 +2779,11 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
KASSERT((intr_info & VMCS_INTR_VALID) != 0,
- ("VM exit interruption info invalid: %#x", intr_info));
+ ("VM exit interruption info invalid: %x", intr_info));
if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) {
KASSERT((intr_info & 0xff) == IDT_NMI, ("VM exit due "
- "to NMI has invalid vector: %#x", intr_info));
+ "to NMI has invalid vector: %x", intr_info));
VCPU_CTR0(vmx->vm, vcpuid, "Vectoring to NMI handler");
#ifdef __FreeBSD__
__asm __volatile("int $2");
@@ -2807,7 +2796,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
static __inline void
vmx_dr_enter_guest(struct vmxctx *vmxctx)
{
- register_t rflags;
+ uint64_t rflags;
/* Save host control debug registers. */
vmxctx->host_dr7 = rdr7();
@@ -2872,14 +2861,14 @@ vmx_dr_leave_guest(struct vmxctx *vmxctx)
}
static int
-vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
+vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
struct vm_eventinfo *evinfo)
{
int rc, handled, launched;
struct vmx *vmx;
struct vm *vm;
struct vmxctx *vmxctx;
- struct vmcs *vmcs;
+ uintptr_t vmcs_pa;
struct vm_exit *vmexit;
struct vlapic *vlapic;
uint32_t exit_reason;
@@ -2890,7 +2879,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
vmx = arg;
vm = vmx->vm;
- vmcs = &vmx->vmcs[vcpu];
+ vmcs_pa = vmx->vmcs_pa[vcpu];
vmxctx = &vmx->ctx[vcpu];
vlapic = vm_lapic(vm, vcpu);
vmexit = vm_exitinfo(vm, vcpu);
@@ -2901,7 +2890,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
vmx_msr_guest_enter(vmx, vcpu);
- VMPTRLD(vmcs);
+ vmcs_load(vmcs_pa);
#ifndef __FreeBSD__
VERIFY(vmx->vmcs_state[vcpu] == VS_NONE && curthread->t_preempt != 0);
@@ -2922,7 +2911,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
vmx_set_pcpu_defaults(vmx, vcpu, pmap);
do {
KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
- "%#lx/%#lx", __func__, vmcs_guest_rip(), rip));
+ "%lx/%lx", __func__, vmcs_guest_rip(), rip));
handled = UNHANDLED;
/*
@@ -3115,7 +3104,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d",
vmexit->exitcode);
- VMCLEAR(vmcs);
+ vmcs_clear(vmcs_pa);
vmx_msr_guest_exit(vmx, vcpu);
#ifndef __FreeBSD__
@@ -3145,10 +3134,9 @@ vmx_vmcleanup(void *arg)
return;
}
-static register_t *
+static uint64_t *
vmxctx_regptr(struct vmxctx *vmxctx, int reg)
{
-
switch (reg) {
case VM_REG_GUEST_RAX:
return (&vmxctx->guest_rax);
@@ -3199,157 +3187,129 @@ vmxctx_regptr(struct vmxctx *vmxctx, int reg)
}
static int
-vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval)
+vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
{
- register_t *regp;
-
- if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
- *retval = *regp;
- return (0);
- } else
- return (EINVAL);
-}
+ int running, hostcpu, err;
+ struct vmx *vmx = arg;
+ uint64_t *regp;
-static int
-vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
-{
- register_t *regp;
+ running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
- if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
- *regp = val;
+ /* VMCS access not required for ctx reads */
+ if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) {
+ *retval = *regp;
return (0);
- } else
- return (EINVAL);
-}
-
-static int
-vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval)
-{
- uint64_t gi;
- int error;
-
- error = vmcs_getreg(&vmx->vmcs[vcpu], running,
- VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi);
- *retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
- return (error);
-}
-
-static int
-vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val)
-{
- struct vmcs *vmcs;
- uint64_t gi;
- int error, ident;
-
- /*
- * Forcing the vcpu into an interrupt shadow is not supported.
- */
- if (val) {
- error = EINVAL;
- goto done;
}
- vmcs = &vmx->vmcs[vcpu];
- ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY);
- error = vmcs_getreg(vmcs, running, ident, &gi);
- if (error == 0) {
- gi &= ~HWINTR_BLOCKING;
- error = vmcs_setreg(vmcs, running, ident, gi);
+ if (!running) {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
}
-done:
- VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val,
- error ? "failed" : "succeeded");
- return (error);
-}
-
-static int
-vmx_shadow_reg(int reg)
-{
- int shreg;
- shreg = -1;
+ err = EINVAL;
+ if (reg == VM_REG_GUEST_INTR_SHADOW) {
+ uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+ *retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
+ err = 0;
+ } else {
+ uint32_t encoding;
- switch (reg) {
- case VM_REG_GUEST_CR0:
- shreg = VMCS_CR0_SHADOW;
- break;
- case VM_REG_GUEST_CR4:
- shreg = VMCS_CR4_SHADOW;
- break;
- default:
- break;
+ encoding = vmcs_field_encoding(reg);
+ if (encoding != VMCS_INVALID_ENCODING) {
+ *retval = vmcs_read(encoding);
+ err = 0;
+ }
}
- return (shreg);
-}
-
-static int
-vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
-{
- int running, hostcpu;
- struct vmx *vmx = arg;
-
- running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
- if (running && hostcpu != curcpu)
- panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
-
- if (reg == VM_REG_GUEST_INTR_SHADOW)
- return (vmx_get_intr_shadow(vmx, vcpu, running, retval));
-
- if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
- return (0);
+ if (!running) {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
- return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval));
+ return (err);
}
static int
vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
{
- int error, hostcpu, running, shadow;
- uint64_t ctls;
- pmap_t pmap;
+ int running, hostcpu, error;
struct vmx *vmx = arg;
+ uint64_t *regp;
running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
if (running && hostcpu != curcpu)
panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
- if (reg == VM_REG_GUEST_INTR_SHADOW)
- return (vmx_modify_intr_shadow(vmx, vcpu, running, val));
-
- if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
+ /* VMCS access not required for ctx writes */
+ if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) {
+ *regp = val;
return (0);
+ }
- error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val);
-
- if (error == 0) {
- /*
- * If the "load EFER" VM-entry control is 1 then the
- * value of EFER.LMA must be identical to "IA-32e mode guest"
- * bit in the VM-entry control.
- */
- if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 &&
- (reg == VM_REG_GUEST_EFER)) {
- vmcs_getreg(&vmx->vmcs[vcpu], running,
- VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls);
- if (val & EFER_LMA)
- ctls |= VM_ENTRY_GUEST_LMA;
- else
- ctls &= ~VM_ENTRY_GUEST_LMA;
- vmcs_setreg(&vmx->vmcs[vcpu], running,
- VMCS_IDENT(VMCS_ENTRY_CTLS), ctls);
- }
+ if (!running) {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ }
- shadow = vmx_shadow_reg(reg);
- if (shadow > 0) {
+ if (reg == VM_REG_GUEST_INTR_SHADOW) {
+ if (val != 0) {
/*
- * Store the unmodified value in the shadow
+ * Forcing the vcpu into an interrupt shadow is not
+ * presently supported.
*/
- error = vmcs_setreg(&vmx->vmcs[vcpu], running,
- VMCS_IDENT(shadow), val);
+ error = EINVAL;
+ } else {
+ uint64_t gi;
+
+ gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+ gi &= ~HWINTR_BLOCKING;
+ vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
+ error = 0;
}
+ } else {
+ uint32_t encoding;
- if (reg == VM_REG_GUEST_CR3) {
+ error = 0;
+ encoding = vmcs_field_encoding(reg);
+ switch (encoding) {
+ case VMCS_GUEST_IA32_EFER:
+ /*
+ * If the "load EFER" VM-entry control is 1 then the
+ * value of EFER.LMA must be identical to "IA-32e mode
+ * guest" bit in the VM-entry control.
+ */
+ if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) {
+ uint64_t ctls;
+
+ ctls = vmcs_read(VMCS_ENTRY_CTLS);
+ if (val & EFER_LMA) {
+ ctls |= VM_ENTRY_GUEST_LMA;
+ } else {
+ ctls &= ~VM_ENTRY_GUEST_LMA;
+ }
+ vmcs_write(VMCS_ENTRY_CTLS, ctls);
+ }
+ vmcs_write(encoding, val);
+ break;
+ case VMCS_GUEST_CR0:
+ /*
+ * The guest is not allowed to modify certain bits in
+ * %cr0 and %cr4. To maintain the illusion of full
+ * control, they have shadow versions which contain the
+ * guest-perceived (via reads from the register) values
+ * as opposed to the guest-effective values.
+ *
+ * This is detailed in the SDM: Vol. 3 Ch. 24.6.6.
+ */
+ vmcs_write(VMCS_CR0_SHADOW, val);
+ vmcs_write(encoding, vmx_fix_cr0(val));
+ break;
+ case VMCS_GUEST_CR4:
+ /* See above for detail on %cr4 shadowing */
+ vmcs_write(VMCS_CR4_SHADOW, val);
+ vmcs_write(encoding, vmx_fix_cr4(val));
+ break;
+ case VMCS_GUEST_CR3:
+ vmcs_write(encoding, val);
/*
* Invalidate the guest vcpu's TLB mappings to emulate
* the behavior of updating %cr3.
@@ -3357,38 +3317,80 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
* XXX the processor retains global mappings when %cr3
* is updated but vmx_invvpid() does not.
*/
- pmap = vmx->ctx[vcpu].pmap;
- vmx_invvpid(vmx, vcpu, pmap, running);
+ vmx_invvpid(vmx, vcpu, vmx->ctx[vcpu].pmap, running);
+ break;
+ case VMCS_INVALID_ENCODING:
+ error = EINVAL;
+ break;
+ default:
+ vmcs_write(encoding, val);
+ break;
}
}
+ if (!running) {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
+
return (error);
}
static int
-vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
{
int hostcpu, running;
struct vmx *vmx = arg;
+ uint32_t base, limit, access;
running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
if (running && hostcpu != curcpu)
panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu);
- return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc));
+ if (!running) {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ }
+
+ vmcs_seg_desc_encoding(seg, &base, &limit, &access);
+ desc->base = vmcs_read(base);
+ desc->limit = vmcs_read(limit);
+ if (access != VMCS_INVALID_ENCODING) {
+ desc->access = vmcs_read(access);
+ } else {
+ desc->access = 0;
+ }
+
+ if (!running) {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
+ return (0);
}
static int
-vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+vmx_setdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
{
int hostcpu, running;
struct vmx *vmx = arg;
+ uint32_t base, limit, access;
running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
if (running && hostcpu != curcpu)
panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu);
- return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc));
+ if (!running) {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ }
+
+ vmcs_seg_desc_encoding(seg, &base, &limit, &access);
+ vmcs_write(base, desc->base);
+ vmcs_write(limit, desc->limit);
+ if (access != VMCS_INVALID_ENCODING) {
+ vmcs_write(access, desc->access);
+ }
+
+ if (!running) {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
+ return (0);
}
static int
@@ -3436,21 +3438,17 @@ static int
vmx_setcap(void *arg, int vcpu, int type, int val)
{
struct vmx *vmx = arg;
- struct vmcs *vmcs = &vmx->vmcs[vcpu];
- uint32_t baseval;
+ uint32_t baseval, reg, flag;
uint32_t *pptr;
int error;
- int flag;
- int reg;
- int retval;
- retval = ENOENT;
+ error = ENOENT;
pptr = NULL;
switch (type) {
case VM_CAP_HALT_EXIT:
if (cap_halt_exit) {
- retval = 0;
+ error = 0;
pptr = &vmx->cap[vcpu].proc_ctls;
baseval = *pptr;
flag = PROCBASED_HLT_EXITING;
@@ -3459,7 +3457,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
break;
case VM_CAP_MTRAP_EXIT:
if (cap_monitor_trap) {
- retval = 0;
+ error = 0;
pptr = &vmx->cap[vcpu].proc_ctls;
baseval = *pptr;
flag = PROCBASED_MTF;
@@ -3468,7 +3466,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
break;
case VM_CAP_PAUSE_EXIT:
if (cap_pause_exit) {
- retval = 0;
+ error = 0;
pptr = &vmx->cap[vcpu].proc_ctls;
baseval = *pptr;
flag = PROCBASED_PAUSE_EXITING;
@@ -3477,7 +3475,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
break;
case VM_CAP_ENABLE_INVPCID:
if (cap_invpcid) {
- retval = 0;
+ error = 0;
pptr = &vmx->cap[vcpu].proc_ctls2;
baseval = *pptr;
flag = PROCBASED2_ENABLE_INVPCID;
@@ -3485,7 +3483,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
}
break;
case VM_CAP_BPT_EXIT:
- retval = 0;
+ error = 0;
/* Don't change the bitmap if we are tracing all exceptions. */
if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) {
@@ -3499,8 +3497,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
break;
}
- if (retval)
- return (retval);
+ if (error != 0) {
+ return (error);
+ }
if (pptr != NULL) {
if (val) {
@@ -3508,12 +3507,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
} else {
baseval &= ~flag;
}
- VMPTRLD(vmcs);
- error = vmwrite(reg, baseval);
- VMCLEAR(vmcs);
-
- if (error)
- return (error);
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ vmcs_write(reg, baseval);
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
/*
* Update optional stored flags, and record
@@ -3715,13 +3711,11 @@ static void
vmx_enable_x2apic_mode_ts(struct vlapic *vlapic)
{
struct vmx *vmx;
- struct vmcs *vmcs;
uint32_t proc_ctls;
int vcpuid;
vcpuid = vlapic->vcpuid;
vmx = ((struct vlapic_vtx *)vlapic)->vmx;
- vmcs = &vmx->vmcs[vcpuid];
proc_ctls = vmx->cap[vcpuid].proc_ctls;
proc_ctls &= ~PROCBASED_USE_TPR_SHADOW;
@@ -3729,34 +3723,32 @@ vmx_enable_x2apic_mode_ts(struct vlapic *vlapic)
proc_ctls |= PROCBASED_CR8_STORE_EXITING;
vmx->cap[vcpuid].proc_ctls = proc_ctls;
- VMPTRLD(vmcs);
+ vmcs_load(vmx->vmcs_pa[vcpuid]);
vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
- VMCLEAR(vmcs);
+ vmcs_clear(vmx->vmcs_pa[vcpuid]);
}
static void
vmx_enable_x2apic_mode_vid(struct vlapic *vlapic)
{
struct vmx *vmx;
- struct vmcs *vmcs;
uint32_t proc_ctls2;
int vcpuid, error;
vcpuid = vlapic->vcpuid;
vmx = ((struct vlapic_vtx *)vlapic)->vmx;
- vmcs = &vmx->vmcs[vcpuid];
proc_ctls2 = vmx->cap[vcpuid].proc_ctls2;
KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0,
- ("%s: invalid proc_ctls2 %#x", __func__, proc_ctls2));
+ ("%s: invalid proc_ctls2 %x", __func__, proc_ctls2));
proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES;
proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE;
vmx->cap[vcpuid].proc_ctls2 = proc_ctls2;
- VMPTRLD(vmcs);
+ vmcs_load(vmx->vmcs_pa[vcpuid]);
vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2);
- VMCLEAR(vmcs);
+ vmcs_clear(vmx->vmcs_pa[vcpuid]);
if (vlapic->vcpuid == 0) {
/*
@@ -3932,10 +3924,9 @@ static void
vmx_savectx(void *arg, int vcpu)
{
struct vmx *vmx = arg;
- struct vmcs *vmcs = &vmx->vmcs[vcpu];
if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) {
- VERIFY3U(vmclear(vmcs), ==, 0);
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
vmx_msr_guest_exit(vmx, vcpu);
/*
* Having VMCLEARed the VMCS, it can no longer be re-entered
@@ -3951,13 +3942,12 @@ static void
vmx_restorectx(void *arg, int vcpu)
{
struct vmx *vmx = arg;
- struct vmcs *vmcs = &vmx->vmcs[vcpu];
ASSERT0(vmx->vmcs_state[vcpu] & VS_LAUNCHED);
if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) {
vmx_msr_guest_enter(vmx, vcpu);
- VERIFY3U(vmptrld(vmcs), ==, 0);
+ vmcs_load(vmx->vmcs_pa[vcpu]);
}
}
#endif /* __FreeBSD__ */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
index 0fd723f9c9..7943c1fd0e 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
@@ -50,44 +50,34 @@
struct pmap;
struct vmxctx {
- register_t guest_rdi; /* Guest state */
- register_t guest_rsi;
- register_t guest_rdx;
- register_t guest_rcx;
- register_t guest_r8;
- register_t guest_r9;
- register_t guest_rax;
- register_t guest_rbx;
- register_t guest_rbp;
- register_t guest_r10;
- register_t guest_r11;
- register_t guest_r12;
- register_t guest_r13;
- register_t guest_r14;
- register_t guest_r15;
- register_t guest_cr2;
- register_t guest_dr0;
- register_t guest_dr1;
- register_t guest_dr2;
- register_t guest_dr3;
- register_t guest_dr6;
-
-#ifdef __FreeBSD__
- register_t host_r15; /* Host state */
- register_t host_r14;
- register_t host_r13;
- register_t host_r12;
- register_t host_rbp;
- register_t host_rsp;
- register_t host_rbx;
-#endif /* __FreeBSD__ */
-
- register_t host_dr0;
- register_t host_dr1;
- register_t host_dr2;
- register_t host_dr3;
- register_t host_dr6;
- register_t host_dr7;
+ uint64_t guest_rdi; /* Guest state */
+ uint64_t guest_rsi;
+ uint64_t guest_rdx;
+ uint64_t guest_rcx;
+ uint64_t guest_r8;
+ uint64_t guest_r9;
+ uint64_t guest_rax;
+ uint64_t guest_rbx;
+ uint64_t guest_rbp;
+ uint64_t guest_r10;
+ uint64_t guest_r11;
+ uint64_t guest_r12;
+ uint64_t guest_r13;
+ uint64_t guest_r14;
+ uint64_t guest_r15;
+ uint64_t guest_cr2;
+ uint64_t guest_dr0;
+ uint64_t guest_dr1;
+ uint64_t guest_dr2;
+ uint64_t guest_dr3;
+ uint64_t guest_dr6;
+
+ uint64_t host_dr0;
+ uint64_t host_dr1;
+ uint64_t host_dr2;
+ uint64_t host_dr3;
+ uint64_t host_dr6;
+ uint64_t host_dr7;
uint64_t host_debugctl;
int host_tf;
@@ -156,6 +146,7 @@ struct vmx {
uint64_t host_msrs[VM_MAXCPU][GUEST_MSR_NUM];
uint64_t tsc_offset_active[VM_MAXCPU];
vmcs_state_t vmcs_state[VM_MAXCPU];
+ uintptr_t vmcs_pa[VM_MAXCPU];
#endif
struct vmxctx ctx[VM_MAXCPU];
struct vmxcap cap[VM_MAXCPU];
@@ -175,17 +166,38 @@ vmx_cap_en(const struct vmx *vmx, enum vmx_caps cap)
return ((vmx->vmx_caps & cap) == cap);
}
+
+/*
+ * Section 5.2 "Conventions" from Intel Architecture Manual 2B.
+ *
+ * error
+ * VMsucceed 0
+ * VMFailInvalid 1
+ * VMFailValid 2 see also VMCS VM-Instruction Error Field
+ */
+#define VM_SUCCESS 0
+#define VM_FAIL_INVALID 1
+#define VM_FAIL_VALID 2
+#define VMX_SET_ERROR_CODE_ASM \
+ " jnc 1f;" \
+ " mov $1, %[error];" /* CF: error = 1 */ \
+ " jmp 3f;" \
+ "1: jnz 2f;" \
+ " mov $2, %[error];" /* ZF: error = 2 */ \
+ " jmp 3f;" \
+ "2: mov $0, %[error];" \
+ "3:"
+
+
#define VMX_GUEST_VMEXIT 0
#define VMX_VMRESUME_ERROR 1
#define VMX_VMLAUNCH_ERROR 2
#define VMX_INVEPT_ERROR 3
#define VMX_VMWRITE_ERROR 4
+
int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched);
void vmx_call_isr(uintptr_t entry);
-u_long vmx_fix_cr0(u_long cr0);
-u_long vmx_fix_cr4(u_long cr4);
-
int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset);
extern char vmx_exit_guest[];
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h
deleted file mode 100644
index f0c5ba7691..0000000000
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- *
- * Copyright 2014 Pluribus Networks Inc.
- * Copyright 2017 Joyent, Inc.
- */
-
-#ifndef _VMX_CPUFUNC_H_
-#define _VMX_CPUFUNC_H_
-
-struct vmcs;
-
-/*
- * Section 5.2 "Conventions" from Intel Architecture Manual 2B.
- *
- * error
- * VMsucceed 0
- * VMFailInvalid 1
- * VMFailValid 2 see also VMCS VM-Instruction Error Field
- */
-#define VM_SUCCESS 0
-#define VM_FAIL_INVALID 1
-#define VM_FAIL_VALID 2
-#define VMX_SET_ERROR_CODE \
- " jnc 1f;" \
- " mov $1, %[error];" /* CF: error = 1 */ \
- " jmp 3f;" \
- "1: jnz 2f;" \
- " mov $2, %[error];" /* ZF: error = 2 */ \
- " jmp 3f;" \
- "2: mov $0, %[error];" \
- "3:"
-
-/* returns 0 on success and non-zero on failure */
-static __inline int
-vmxon(char *region)
-{
- int error;
- uint64_t addr;
-
-#ifdef __FreeBSD__
- addr = vtophys(region);
-#else
- /* This is pre-translated in illumos */
- addr = (uint64_t)region;
-#endif
- __asm __volatile("vmxon %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
-
- return (error);
-}
-
-#ifdef __FreeBSD__
-/* returns 0 on success and non-zero on failure */
-static __inline int
-vmclear(struct vmcs *vmcs)
-{
- int error;
- uint64_t addr;
-
- addr = vtophys(vmcs);
- __asm __volatile("vmclear %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
- return (error);
-}
-#endif /* __FreeBSD__ */
-
-static __inline void
-vmxoff(void)
-{
-
- __asm __volatile("vmxoff");
-}
-
-static __inline void
-vmptrst(uint64_t *addr)
-{
-
- __asm __volatile("vmptrst %[addr]" :: [addr]"m" (*addr) : "memory");
-}
-
-#ifdef __FreeBSD__
-static __inline int
-vmptrld(struct vmcs *vmcs)
-{
- int error;
- uint64_t addr;
-
- addr = vtophys(vmcs);
- __asm __volatile("vmptrld %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
- return (error);
-}
-#endif /* __FreeBSD__ */
-
-static __inline int
-vmwrite(uint64_t reg, uint64_t val)
-{
- int error;
-
- __asm __volatile("vmwrite %[val], %[reg];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [val] "r" (val), [reg] "r" (reg)
- : "memory");
-
- return (error);
-}
-
-static __inline int
-vmread(uint64_t r, uint64_t *addr)
-{
- int error;
-
- __asm __volatile("vmread %[r], %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [r] "r" (r), [addr] "m" (*addr)
- : "memory");
-
- return (error);
-}
-
-#ifdef __FreeBSD__
-static __inline void
-VMCLEAR(struct vmcs *vmcs)
-{
- int err;
-
- err = vmclear(vmcs);
- if (err != 0)
- panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
-
- critical_exit();
-}
-
-static __inline void
-VMPTRLD(struct vmcs *vmcs)
-{
- int err;
-
- critical_enter();
-
- err = vmptrld(vmcs);
- if (err != 0)
- panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
-}
-#endif /* __FreeBSD__ */
-
-#define INVVPID_TYPE_ADDRESS 0UL
-#define INVVPID_TYPE_SINGLE_CONTEXT 1UL
-#define INVVPID_TYPE_ALL_CONTEXTS 2UL
-
-struct invvpid_desc {
- uint16_t vpid;
- uint16_t _res1;
- uint32_t _res2;
- uint64_t linear_addr;
-};
-CTASSERT(sizeof(struct invvpid_desc) == 16);
-
-static __inline void
-invvpid(uint64_t type, struct invvpid_desc desc)
-{
- int error;
-
- __asm __volatile("invvpid %[desc], %[type];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [desc] "m" (desc), [type] "r" (type)
- : "memory");
-
- if (error)
- panic("invvpid error %d", error);
-}
-
-#define INVEPT_TYPE_SINGLE_CONTEXT 1UL
-#define INVEPT_TYPE_ALL_CONTEXTS 2UL
-struct invept_desc {
- uint64_t eptp;
- uint64_t _res;
-};
-CTASSERT(sizeof(struct invept_desc) == 16);
-
-static __inline void
-invept(uint64_t type, struct invept_desc desc)
-{
- int error;
-
- __asm __volatile("invept %[desc], %[type];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [desc] "m" (desc), [type] "r" (type)
- : "memory");
-
- if (error)
- panic("invept error %d", error);
-}
-#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
index 6c37c9c234..cfdf2bfe05 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
@@ -62,13 +62,6 @@ vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
return ((msr_val & (1UL << bitpos)) == 0);
}
-uint32_t
-vmx_revision(void)
-{
-
- return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
-}
-
/*
* Generate a bitmask to be used for the VMCS execution control fields.
*
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
index ac2adb0dd1..848cdea26b 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
@@ -40,8 +40,6 @@ void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid);
int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu);
int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu);
-uint32_t vmx_revision(void);
-
int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
uint32_t zeros_mask, uint32_t *retval);
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
index 50c0934ace..79524220b5 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
@@ -611,10 +611,10 @@ vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
ptpindex = 0;
ptpshift = 0;
- KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__,
+ KASSERT(gpa + len > gpa, ("%s: invalid gpa range %lx/%lx", __func__,
gpa, len));
- KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond "
- "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr));
+ KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %lx/%lx beyond "
+ "domain maxaddr %lx", __func__, gpa, len, dom->maxaddr));
if (gpa & PAGE_MASK)
panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
diff --git a/usr/src/uts/i86pc/io/vmm/io/iommu.c b/usr/src/uts/i86pc/io/vmm/io/iommu.c
index 918a9ec3e4..2e5fc9df32 100644
--- a/usr/src/uts/i86pc/io/vmm/io/iommu.c
+++ b/usr/src/uts/i86pc/io/vmm/io/iommu.c
@@ -204,12 +204,8 @@ iommu_find_device(dev_info_t *dip, void *arg)
static void
iommu_init(void)
{
- int error, bus, slot, func;
+ int error;
vm_paddr_t maxaddr;
-#ifdef __FreeBSD__
- devclass_t dc;
-#endif
- device_t dev;
if (!iommu_enable)
return;
@@ -246,35 +242,7 @@ iommu_init(void)
*/
iommu_create_mapping(host_domain, 0, 0, maxaddr);
-#ifdef __FreeBSD__
- add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0);
- delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete,
- NULL, 0);
- dc = devclass_find("ppt");
- for (bus = 0; bus <= PCI_BUSMAX; bus++) {
- for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
- for (func = 0; func <= PCI_FUNCMAX; func++) {
- dev = pci_find_dbsf(0, bus, slot, func);
- if (dev == NULL)
- continue;
-
- /* Skip passthrough devices. */
- if (dc != NULL &&
- device_get_devclass(dev) == dc)
- continue;
-
- /*
- * Everything else belongs to the host
- * domain.
- */
- iommu_add_device(host_domain,
- pci_get_rid(dev));
- }
- }
- }
-#else
ddi_walk_devs(ddi_root_node(), iommu_find_device, (void *)B_TRUE);
-#endif
IOMMU_ENABLE();
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index c1825f4264..f7a05254ec 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -96,6 +96,12 @@ __FBSDID("$FreeBSD$");
static void vlapic_set_error(struct vlapic *, uint32_t, bool);
static void vlapic_tmr_reset(struct vlapic *);
+#ifdef __ISRVEC_DEBUG
+static void vlapic_isrstk_accept(struct vlapic *, int);
+static void vlapic_isrstk_eoi(struct vlapic *, int);
+static void vlapic_isrstk_verify(const struct vlapic *);
+#endif /* __ISRVEC_DEBUG */
+
static __inline uint32_t
vlapic_get_id(struct vlapic *vlapic)
{
@@ -134,12 +140,14 @@ vlapic_dfr_write_handler(struct vlapic *vlapic)
lapic->dfr &= APIC_DFR_MODEL_MASK;
lapic->dfr |= APIC_DFR_RESERVED;
+#ifdef __FreeBSD__
if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
else
VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr);
+#endif
}
void
@@ -495,21 +503,33 @@ vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt)
return (1);
}
-#if 1
-static void
-dump_isrvec_stk(struct vlapic *vlapic)
+static uint_t
+vlapic_active_isr(struct vlapic *vlapic)
{
int i;
- uint32_t *isrptr;
+ uint32_t *isrp;
- isrptr = &vlapic->apic_page->isr0;
- for (i = 0; i < 8; i++)
- printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
+ isrp = &vlapic->apic_page->isr7;
- for (i = 0; i <= vlapic->isrvec_stk_top; i++)
- printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
+ for (i = 7; i >= 0; i--, isrp -= 4) {
+ uint32_t reg = *isrp;
+
+ if (reg != 0) {
+ uint_t vec = (i * 32) + bsrl(reg);
+
+ if (vec < 16) {
+ /*
+ * Truncate the illegal low vectors to value of
+ * 0, indicating that no active ISR was found.
+ */
+ return (0);
+ }
+ return (vec);
+ }
+ }
+
+ return (0);
}
-#endif
/*
* Algorithm adopted from section "Interrupt, Task and Processor Priority"
@@ -520,55 +540,11 @@ vlapic_update_ppr(struct vlapic *vlapic)
{
int isrvec, tpr, ppr;
- /*
- * Note that the value on the stack at index 0 is always 0.
- *
- * This is a placeholder for the value of ISRV when none of the
- * bits is set in the ISRx registers.
- */
- isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
+ isrvec = vlapic_active_isr(vlapic);
tpr = vlapic->apic_page->tpr;
-#if 1
- {
- int i, lastprio, curprio, vector, idx;
- uint32_t *isrptr;
-
- if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
- panic("isrvec_stk is corrupted: %d", isrvec);
-
- /*
- * Make sure that the priority of the nested interrupts is
- * always increasing.
- */
- lastprio = -1;
- for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
- curprio = PRIO(vlapic->isrvec_stk[i]);
- if (curprio <= lastprio) {
- dump_isrvec_stk(vlapic);
- panic("isrvec_stk does not satisfy invariant");
- }
- lastprio = curprio;
- }
-
- /*
- * Make sure that each bit set in the ISRx registers has a
- * corresponding entry on the isrvec stack.
- */
- i = 1;
- isrptr = &vlapic->apic_page->isr0;
- for (vector = 0; vector < 256; vector++) {
- idx = (vector / 32) * 4;
- if (isrptr[idx] & (1 << (vector % 32))) {
- if (i > vlapic->isrvec_stk_top ||
- vlapic->isrvec_stk[i] != vector) {
- dump_isrvec_stk(vlapic);
- panic("ISR and isrvec_stk out of sync");
- }
- i++;
- }
- }
- }
+#ifdef __ISRVEC_DEBUG
+ vlapic_isrstk_verify(vlapic);
#endif
if (PRIO(tpr) >= PRIO(isrvec))
@@ -593,25 +569,25 @@ vlapic_process_eoi(struct vlapic *vlapic)
{
struct LAPIC *lapic = vlapic->apic_page;
uint32_t *isrptr, *tmrptr;
- int i, idx, bitpos, vector;
+ int i;
+ uint_t idx, bitpos, vector;
isrptr = &lapic->isr0;
tmrptr = &lapic->tmr0;
for (i = 7; i >= 0; i--) {
idx = i * 4;
- bitpos = fls(isrptr[idx]);
- if (bitpos-- != 0) {
- if (vlapic->isrvec_stk_top <= 0) {
- panic("invalid vlapic isrvec_stk_top %d",
- vlapic->isrvec_stk_top);
- }
- isrptr[idx] &= ~(1 << bitpos);
+ if (isrptr[idx] != 0) {
+ bitpos = bsrl(isrptr[idx]);
vector = i * 32 + bitpos;
+
+ isrptr[idx] &= ~(1 << bitpos);
VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d",
vector);
VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
- vlapic->isrvec_stk_top--;
+#ifdef __ISRVEC_DEBUG
+ vlapic_isrstk_eoi(vlapic, vector);
+#endif
vlapic_update_ppr(vlapic);
if ((tmrptr[idx] & (1 << bitpos)) != 0) {
vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
@@ -1143,7 +1119,7 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
{
struct LAPIC *lapic = vlapic->apic_page;
uint32_t *irrptr, *isrptr;
- int idx, stk_top;
+ int idx;
if (vlapic->ops.intr_accepted)
return ((*vlapic->ops.intr_accepted)(vlapic, vector));
@@ -1162,16 +1138,9 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
isrptr[idx] |= 1 << (vector % 32);
VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
- /*
- * Update the PPR
- */
- vlapic->isrvec_stk_top++;
-
- stk_top = vlapic->isrvec_stk_top;
- if (stk_top >= ISRVEC_STK_SIZE)
- panic("isrvec_stk_top overflow %d", stk_top);
-
- vlapic->isrvec_stk[stk_top] = vector;
+#ifdef __ISRVEC_DEBUG
+ vlapic_isrstk_accept(vlapic, vector);
+#endif
}
void
@@ -1708,3 +1677,92 @@ vlapic_localize_resources(struct vlapic *vlapic)
vmm_glue_callout_localize(&vlapic->callout);
}
#endif /* __FreeBSD */
+
+#ifdef __ISRVEC_DEBUG
+static void
+vlapic_isrstk_eoi(struct vlapic *vlapic, int vector)
+{
+ if (vlapic->isrvec_stk_top <= 0) {
+ panic("invalid vlapic isrvec_stk_top %d",
+ vlapic->isrvec_stk_top);
+ }
+ vlapic->isrvec_stk_top--;
+}
+
+static void
+vlapic_isrstk_accept(struct vlapic *vlapic, int vector)
+{
+ int stk_top;
+
+ vlapic->isrvec_stk_top++;
+
+ stk_top = vlapic->isrvec_stk_top;
+ if (stk_top >= ISRVEC_STK_SIZE)
+ panic("isrvec_stk_top overflow %d", stk_top);
+
+ vlapic->isrvec_stk[stk_top] = vector;
+}
+
+static void
+vlapic_isrstk_dump(const struct vlapic *vlapic)
+{
+ int i;
+ uint32_t *isrptr;
+
+ isrptr = &vlapic->apic_page->isr0;
+ for (i = 0; i < 8; i++)
+ printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
+
+ for (i = 0; i <= vlapic->isrvec_stk_top; i++)
+ printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
+}
+
+static void
+vlapic_isrstk_verify(const struct vlapic *vlapic)
+{
+ int i, lastprio, curprio, vector, idx;
+ uint32_t *isrptr;
+
+ /*
+ * Note: The value at index 0 in isrvec_stk is always 0.
+ *
+ * It is a placeholder for the value of ISR vector when no bits are set
+ * in the ISRx registers.
+ */
+ if (vlapic->isrvec_stk_top == 0 && vlapic->isrvec_stk[0] != 0) {
+ panic("isrvec_stk is corrupted: %d", vlapic->isrvec_stk[0]);
+ }
+
+ /*
+ * Make sure that the priority of the nested interrupts is
+ * always increasing.
+ */
+ lastprio = -1;
+ for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
+ curprio = PRIO(vlapic->isrvec_stk[i]);
+ if (curprio <= lastprio) {
+ vlapic_isrstk_dump(vlapic);
+ panic("isrvec_stk does not satisfy invariant");
+ }
+ lastprio = curprio;
+ }
+
+ /*
+ * Make sure that each bit set in the ISRx registers has a
+ * corresponding entry on the isrvec stack.
+ */
+ i = 1;
+ isrptr = &vlapic->apic_page->isr0;
+ for (vector = 0; vector < 256; vector++) {
+ idx = (vector / 32) * 4;
+ if (isrptr[idx] & (1 << (vector % 32))) {
+ if (i > vlapic->isrvec_stk_top ||
+ vlapic->isrvec_stk[i] != vector) {
+ vlapic_isrstk_dump(vlapic);
+ panic("ISR and isrvec_stk out of sync");
+ }
+ i++;
+ }
+ }
+}
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
index 5795d48d52..8a0d594de3 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
@@ -27,6 +27,18 @@
*
* $FreeBSD$
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#ifndef _VLAPIC_PRIV_H_
#define _VLAPIC_PRIV_H_
@@ -140,6 +152,10 @@ enum boot_state {
#define VLAPIC_TMR_CNT 8
+#ifdef DEBUG
+#define __ISRVEC_DEBUG
+#endif
+
struct vlapic;
struct vlapic_ops {
@@ -166,15 +182,6 @@ struct vlapic {
struct bintime timer_period_bt; /* timer period */
struct mtx timer_mtx;
- /*
- * The 'isrvec_stk' is a stack of vectors injected by the local apic.
- * A vector is popped from the stack when the processor does an EOI.
- * The vector on the top of the stack is used to compute the
- * Processor Priority in conjunction with the TPR.
- */
- uint8_t isrvec_stk[ISRVEC_STK_SIZE];
- int isrvec_stk_top;
-
uint64_t msr_apicbase;
enum boot_state boot_state;
@@ -199,6 +206,19 @@ struct vlapic {
*/
uint32_t tmr_vec_deassert[VLAPIC_TMR_CNT];
uint32_t tmr_vec_assert[VLAPIC_TMR_CNT];
+
+#ifdef __ISRVEC_DEBUG
+ /*
+ * The 'isrvec_stk' is a stack of vectors injected by the local APIC.
+ * It is used as a debugging method to double-check the behavior of the
+ * emulation. Vectors are pushed to the stack when they are accepted
+ * for injection and popped from the stack when the processor performs
+ * an EOI. The vector on the top of the stack is used to verify the
+ * computed Processor Priority.
+ */
+ uint8_t isrvec_stk[ISRVEC_STK_SIZE];
+ int isrvec_stk_top;
+#endif
};
void vlapic_init(struct vlapic *vlapic);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
index 0dce2b0a1f..6664cb06e7 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
@@ -98,7 +98,7 @@ vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes,
now = sbinuptime();
delta = now - vpmtmr->baseuptime;
KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: "
- "%#lx to %#lx", vpmtmr->baseuptime, now));
+ "%lx to %lx", vpmtmr->baseuptime, now));
*val = vpmtmr->baseval + delta / vpmtmr->freq_sbt;
return (0);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
index 343ad9c37a..e560ce9b7f 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
@@ -161,7 +161,7 @@ vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime)
now = sbinuptime();
delta = now - vrtc->base_uptime;
KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
- "%#lx to %#lx", vrtc->base_uptime, now));
+ "%lx to %lx", vrtc->base_uptime, now));
secs = delta / SBT_1S;
t += secs;
*basetime += secs * SBT_1S;
@@ -191,7 +191,7 @@ secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
if (rtctime < 0) {
KASSERT(rtctime == VRTC_BROKEN_TIME,
- ("%s: invalid vrtc time %#lx", __func__, rtctime));
+ ("%s: invalid vrtc time %lx", __func__, rtctime));
return;
}
@@ -286,33 +286,23 @@ rtc_to_secs(struct vrtc *vrtc)
struct clocktime ct;
struct timespec ts;
struct rtcdev *rtc;
-#ifdef __FreeBSD__
- struct vm *vm;
-#endif
int century, error, hour, pm, year;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
-#ifdef __FreeBSD__
- vm = vrtc->vm;
-#endif
rtc = &vrtc->rtcdev;
bzero(&ct, sizeof(struct clocktime));
error = rtcget(rtc, rtc->sec, &ct.sec);
if (error || ct.sec < 0 || ct.sec > 59) {
-#ifdef __FreeBSD__
- VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec);
-#endif
+ /* invalid RTC seconds */
goto fail;
}
error = rtcget(rtc, rtc->min, &ct.min);
if (error || ct.min < 0 || ct.min > 59) {
-#ifdef __FreeBSD__
- VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min);
-#endif
+ /* invalid RTC minutes */
goto fail;
}
@@ -342,18 +332,13 @@ rtc_to_secs(struct vrtc *vrtc)
if (pm)
ct.hour += 12;
} else {
-#ifdef __FreeBSD__
- VM_CTR2(vm, "Invalid RTC 12-hour format %#x/%d",
- rtc->hour, ct.hour);
-#endif
+ /* invalid RTC 12-hour format */
goto fail;
}
}
if (error || ct.hour < 0 || ct.hour > 23) {
-#ifdef __FreeBSD__
- VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour);
-#endif
+ /* invalid RTC hour */
goto fail;
}
@@ -367,47 +352,32 @@ rtc_to_secs(struct vrtc *vrtc)
error = rtcget(rtc, rtc->day_of_month, &ct.day);
if (error || ct.day < 1 || ct.day > 31) {
-#ifdef __FreeBSD__
- VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month,
- ct.day);
-#endif
+ /* invalid RTC mday */
goto fail;
}
error = rtcget(rtc, rtc->month, &ct.mon);
if (error || ct.mon < 1 || ct.mon > 12) {
-#ifdef __FreeBSD__
- VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon);
-#endif
+ /* invalid RTC month */
goto fail;
}
error = rtcget(rtc, rtc->year, &year);
if (error || year < 0 || year > 99) {
-#ifdef __FreeBSD__
- VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
-#endif
+ /* invalid RTC year */
goto fail;
}
error = rtcget(rtc, rtc->century, &century);
ct.year = century * 100 + year;
if (error || ct.year < POSIX_BASE_YEAR) {
-#ifdef __FreeBSD__
- VM_CTR2(vm, "Invalid RTC century %#x/%d", rtc->century,
- ct.year);
-#endif
+ /* invalid RTC century */
goto fail;
}
error = clock_ct_to_ts(&ct, &ts);
if (error || ts.tv_sec < 0) {
-#ifdef __FreeBSD__
- VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d",
- ct.year, ct.mon, ct.day);
- VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d",
- ct.hour, ct.min, ct.sec);
-#endif
+ /* invalid RTC clocktime */
goto fail;
}
return (ts.tv_sec); /* success */
@@ -416,9 +386,6 @@ fail:
* Stop updating the RTC if the date/time fields programmed by
* the guest are invalid.
*/
-#ifdef __FreeBSD__
- VM_CTR0(vrtc->vm, "Invalid RTC date/time programming detected");
-#endif
return (VRTC_BROKEN_TIME);
}
@@ -426,9 +393,6 @@ static int
vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
{
struct rtcdev *rtc;
-#ifdef __FreeBSD__
- sbintime_t oldbase;
-#endif
time_t oldtime;
uint8_t alarm_sec, alarm_min, alarm_hour;
@@ -440,14 +404,9 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
alarm_hour = rtc->alarm_hour;
oldtime = vrtc->base_rtctime;
- VM_CTR2(vrtc->vm, "Updating RTC secs from %#lx to %#lx",
+ VM_CTR2(vrtc->vm, "Updating RTC secs from %lx to %lx",
oldtime, newtime);
-#ifdef __FreeBSD__
- oldbase = vrtc->base_uptime;
- VM_CTR2(vrtc->vm, "Updating RTC base uptime from %#lx to %#lx",
- oldbase, newbase);
-#endif
vrtc->base_uptime = newbase;
if (newtime == oldtime)
@@ -614,7 +573,7 @@ vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq)
active = callout_active(&vrtc->callout) ? 1 : 0;
KASSERT((freq == 0 && !active) || (freq != 0 && active),
- ("vrtc callout %s with frequency %#lx",
+ ("vrtc callout %s with frequency %lx",
active ? "active" : "inactive", freq));
}
@@ -643,7 +602,7 @@ vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval)
rtc->reg_c = newirqf | newval;
changed = oldval ^ rtc->reg_c;
if (changed) {
- VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x",
+ VM_CTR2(vrtc->vm, "RTC reg_c changed from %x to %x",
oldval, rtc->reg_c);
}
@@ -674,7 +633,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
rtc->reg_b = newval;
changed = oldval ^ newval;
if (changed) {
- VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x",
+ VM_CTR2(vrtc->vm, "RTC reg_b changed from %x to %x",
oldval, newval);
}
@@ -689,7 +648,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
} else {
curtime = vrtc_curtime(vrtc, &basetime);
KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch "
- "between vrtc basetime (%#lx) and curtime (%#lx)",
+ "between vrtc basetime (%lx) and curtime (%lx)",
__func__, vrtc->base_rtctime, curtime));
/*
@@ -745,7 +704,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
oldfreq = vrtc_freq(vrtc);
if (divider_enabled(oldval) && !divider_enabled(newval)) {
- VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx",
+ VM_CTR2(vrtc->vm, "RTC divider held in reset at %lx/%lx",
vrtc->base_rtctime, vrtc->base_uptime);
} else if (!divider_enabled(oldval) && divider_enabled(newval)) {
/*
@@ -755,7 +714,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
* while the dividers were disabled.
*/
vrtc->base_uptime = sbinuptime();
- VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx",
+ VM_CTR2(vrtc->vm, "RTC divider out of reset at %lx/%lx",
vrtc->base_rtctime, vrtc->base_uptime);
} else {
/* NOTHING */
@@ -764,7 +723,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
vrtc->rtcdev.reg_a = newval;
changed = oldval ^ newval;
if (changed) {
- VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x",
+ VM_CTR2(vrtc->vm, "RTC reg_a changed from %x to %x",
oldval, newval);
}
@@ -790,10 +749,10 @@ vrtc_set_time(struct vm *vm, time_t secs)
VRTC_UNLOCK(vrtc);
if (error) {
- VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error,
+ VM_CTR2(vrtc->vm, "Error %d setting RTC time to %lx", error,
secs);
} else {
- VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs);
+ VM_CTR1(vrtc->vm, "RTC time set to %lx", secs);
}
return (error);
@@ -835,7 +794,7 @@ vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
VRTC_LOCK(vrtc);
ptr = (uint8_t *)(&vrtc->rtcdev);
ptr[offset] = value;
- VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset);
+ VM_CTR2(vrtc->vm, "RTC nvram write %x to offset %x", value, offset);
VRTC_UNLOCK(vrtc);
return (0);
@@ -945,24 +904,24 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
} else {
*val = *((uint8_t *)rtc + offset);
}
- VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x",
+ VCPU_CTR2(vm, vcpuid, "Read value %x from RTC offset %x",
*val, offset);
} else {
switch (offset) {
case 10:
- VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val);
+ VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %x", *val);
vrtc_set_reg_a(vrtc, *val);
break;
case 11:
- VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val);
+ VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %x", *val);
error = vrtc_set_reg_b(vrtc, *val);
break;
case 12:
- VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)",
+ VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %x (ignored)",
*val);
break;
case 13:
- VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)",
+ VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %x (ignored)",
*val);
break;
case 0:
@@ -972,7 +931,7 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
*val &= 0x7f;
/* FALLTHRU */
default:
- VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x",
+ VCPU_CTR2(vm, vcpuid, "RTC offset %x set to %x",
offset, *val);
*((uint8_t *)rtc + offset) = *val;
break;
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index fbd2884b84..9501850dfc 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -74,7 +74,7 @@ typedef int (*vmm_init_func_t)(int ipinum);
typedef int (*vmm_cleanup_func_t)(void);
typedef void (*vmm_resume_func_t)(void);
typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
-typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
+typedef int (*vmi_run_func_t)(void *vmi, int vcpu, uint64_t rip,
struct pmap *pmap, struct vm_eventinfo *info);
typedef void (*vmi_cleanup_func_t)(void *vmi);
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 7a47cd0cd1..4eb967fd89 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -1014,7 +1014,7 @@ vm_iommu_modify(struct vm *vm, bool map)
if (map) {
KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0,
- ("iommu map found invalid memmap %#lx/%#lx/%#x",
+ ("iommu map found invalid memmap %lx/%lx/%x",
mm->gpa, mm->len, mm->flags));
if ((mm->flags & VM_MEMMAP_F_WIRED) == 0)
continue;
@@ -1024,7 +1024,7 @@ vm_iommu_modify(struct vm *vm, bool map)
continue;
mm->flags &= ~VM_MEMMAP_F_IOMMU;
KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0,
- ("iommu unmap found invalid memmap %#lx/%#lx/%#x",
+ ("iommu unmap found invalid memmap %lx/%lx/%x",
mm->gpa, mm->len, mm->flags));
}
@@ -1032,7 +1032,7 @@ vm_iommu_modify(struct vm *vm, bool map)
while (gpa < mm->gpa + mm->len) {
vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE,
&cookie);
- KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
+ KASSERT(vp != NULL, ("vm(%s) could not map gpa %lx",
vm_name(vm), gpa));
vm_gpa_release(cookie);
@@ -1213,7 +1213,7 @@ vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
return (error);
/* Set 'nextrip' to match the value of %rip */
- VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val);
+ VCPU_CTR1(vm, vcpuid, "Setting nextrip to %lx", val);
vcpu = &vm->vcpu[vcpuid];
vcpu->nextrip = val;
return (0);
@@ -1561,7 +1561,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
vme->u.paging.gpa, ftype);
if (rv == 0) {
- VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx",
+ VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %lx",
ftype == VM_PROT_READ ? "accessed" : "dirty",
vme->u.paging.gpa);
goto done;
@@ -1571,7 +1571,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
map = &vm->vmspace->vm_map;
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
- VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
+ VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %lx, "
"ftype = %d", rv, vme->u.paging.gpa, ftype);
if (rv != KERN_SUCCESS)
@@ -1635,7 +1635,7 @@ vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu)
inst_addr = vme->rip + vme->u.mmio_emul.cs_base;
cs_d = vme->u.mmio_emul.cs_d;
- VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx",
+ VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %lx",
vme->u.mmio_emul.gpa);
/* Fetch the faulting instruction */
@@ -1655,7 +1655,7 @@ vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu)
}
if (vie_decode_instruction(vie, vm, vcpuid, cs_d) != 0) {
- VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx",
+ VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %lx",
inst_addr);
/* Dump (unrecognized) instruction bytes in userspace */
vie_fallback_exitinfo(vie, vme);
@@ -1915,7 +1915,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how)
if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
return (EINVAL);
- if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
+ if (atomic_cmpset_int((uint_t *)&vm->suspend, 0, how) == 0) {
VM_CTR2(vm, "virtual machine already suspended %d/%d",
vm->suspend, how);
return (EALREADY);
@@ -2406,7 +2406,7 @@ vm_restart_instruction(void *arg, int vcpuid)
* instruction to be restarted.
*/
vcpu->exitinfo.inst_length = 0;
- VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by "
+ VCPU_CTR1(vm, vcpuid, "restarting instruction at %lx by "
"setting inst_length to zero", vcpu->exitinfo.rip);
} else if (state == VCPU_FROZEN) {
/*
@@ -2418,7 +2418,7 @@ vm_restart_instruction(void *arg, int vcpuid)
error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip);
KASSERT(!error, ("%s: error %d getting rip", __func__, error));
VCPU_CTR2(vm, vcpuid, "restarting instruction by updating "
- "nextrip from %#lx to %#lx", vcpu->nextrip, rip);
+ "nextrip from %lx to %lx", vcpu->nextrip, rip);
vcpu->nextrip = rip;
} else {
panic("%s: invalid state %d", __func__, state);
@@ -2449,7 +2449,7 @@ vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
} else {
info = 0;
}
- VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
+ VCPU_CTR2(vm, vcpuid, "%s: info1(%lx)", __func__, info);
vcpu->exitintinfo = info;
return (0);
}
@@ -2467,11 +2467,7 @@ exception_class(uint64_t info)
{
int type, vector;
-#ifdef __FreeBSD__
- KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
-#else
KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %lx", info));
-#endif
type = info & VM_INTINFO_TYPE;
vector = info & 0xff;
@@ -2519,13 +2515,8 @@ nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
enum exc_class exc1, exc2;
int type1, vector1;
-#ifdef __FreeBSD__
- KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
- KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
-#else
KASSERT(info1 & VM_INTINFO_VALID, ("info1 %lx is not valid", info1));
KASSERT(info2 & VM_INTINFO_VALID, ("info2 %lx is not valid", info2));
-#endif
/*
* If an exception occurs while attempting to call the double-fault
@@ -2534,7 +2525,7 @@ nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
type1 = info1 & VM_INTINFO_TYPE;
vector1 = info1 & 0xff;
if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
- VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
+ VCPU_CTR2(vm, vcpuid, "triple fault: info1(%lx), info2(%lx)",
info1, info2);
vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
*retinfo = 0;
@@ -2594,7 +2585,7 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
if (vcpu->exception_pending) {
info2 = vcpu_exception_intinfo(vcpu);
vcpu->exception_pending = 0;
- VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
+ VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %lx",
vcpu->exc_vector, info2);
}
@@ -2611,8 +2602,8 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
}
if (valid) {
- VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
- "retinfo(%#lx)", __func__, info1, info2, *retinfo);
+ VCPU_CTR4(vm, vcpuid, "%s: info1(%lx), info2(%lx), "
+ "retinfo(%lx)", __func__, info1, info2, *retinfo);
}
return (valid);
@@ -2735,7 +2726,7 @@ vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2)
int error;
vm = vmarg;
- VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
+ VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %x, cr2 %lx",
error_code, cr2);
error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index 4dcaba8a82..696052d7d6 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -181,7 +181,7 @@ vmm_alloc_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused)
{
struct kmem_item *i = (struct kmem_item *)val;
- cmn_err(CE_PANIC, "!vmm_alloc_check: hash not empty: %p, %d", i->addr,
+ cmn_err(CE_PANIC, "!vmm_alloc_check: hash not empty: %p, %lu", i->addr,
i->size);
return (MH_WALK_TERMINATE);
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index fc0cf6622f..ae450f1d9b 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -1431,7 +1431,7 @@ static char *x86_feature_names[NUM_X86_FEATURES] = {
"tbm",
"avx512_vnni",
"amd_pcec",
- "mb_clear",
+ "md_clear",
"mds_no",
"core_thermal",
"pkg_thermal",
diff --git a/usr/src/uts/i86pc/os/gipt.c b/usr/src/uts/i86pc/os/gipt.c
index ace7e03438..7bff5c3897 100644
--- a/usr/src/uts/i86pc/os/gipt.c
+++ b/usr/src/uts/i86pc/os/gipt.c
@@ -355,7 +355,8 @@ gipt_map_next_page(gipt_map_t *map, uint64_t va, uint64_t max_va, gipt_t **ptp)
ASSERT3P(pt, !=, NULL);
break;
} else {
- panic("unexpected PTE type %x @ va %p", ptet, cur_va);
+ panic("unexpected PTE type %x @ va %p", ptet,
+ (void *)cur_va);
}
}
@@ -387,7 +388,8 @@ gipt_map_next_page(gipt_map_t *map, uint64_t va, uint64_t max_va, gipt_t **ptp)
pt = gipt_map_lookup(map, cur_va, pt->gipt_level - 1);
ASSERT3P(pt, !=, NULL);
} else {
- panic("unexpected PTE type %x @ va %p", ptet, cur_va);
+ panic("unexpected PTE type %x @ va %p", ptet,
+ (void *)cur_va);
}
}
diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c
index a41ff3e0d1..0e84030ac1 100644
--- a/usr/src/uts/i86pc/os/hma.c
+++ b/usr/src/uts/i86pc/os/hma.c
@@ -11,6 +11,7 @@
/*
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
#include <sys/cpuvar.h>
@@ -33,6 +34,7 @@ struct hma_reg {
static kmutex_t hma_lock;
static list_t hma_registrations;
static boolean_t hma_exclusive = B_FALSE;
+int hma_disable = 0;
static boolean_t hma_vmx_ready = B_FALSE;
static const char *hma_vmx_error = NULL;
@@ -89,6 +91,11 @@ hma_init(void)
list_create(&hma_registrations, sizeof (struct hma_reg),
offsetof(struct hma_reg, hr_node));
+ if (hma_disable != 0) {
+ cmn_err(CE_CONT, "?hma_init: disabled");
+ return;
+ }
+
switch (cpuid_getvendor(CPU)) {
case X86_VENDOR_Intel:
(void) hma_vmx_init();
diff --git a/usr/src/uts/i86pc/os/mp_implfuncs.c b/usr/src/uts/i86pc/os/mp_implfuncs.c
index 2d0bd3eb53..c61e6216b0 100644
--- a/usr/src/uts/i86pc/os/mp_implfuncs.c
+++ b/usr/src/uts/i86pc/os/mp_implfuncs.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2020 Oxide Computer Company
*/
#define PSMI_1_7
@@ -32,6 +33,7 @@
#include <sys/psm_modctl.h>
#include <sys/smp_impldefs.h>
#include <sys/reboot.h>
+#include <sys/prom_debug.h>
#if defined(__xpv)
#include <sys/hypervisor.h>
#include <vm/kboot_mmu.h>
@@ -390,12 +392,6 @@ psm_modload(void)
close_mach_list();
}
-#if defined(__xpv)
-#define NOTSUP_MSG "This version of Solaris xVM does not support this hardware"
-#else
-#define NOTSUP_MSG "This version of Solaris does not support this hardware"
-#endif /* __xpv */
-
void
psm_install(void)
{
@@ -406,14 +402,18 @@ psm_install(void)
mutex_enter(&psmsw_lock);
for (swp = psmsw->psw_forw; swp != psmsw; ) {
+ PRM_DEBUGS(swp->psw_infop->p_mach_idstring);
opsp = swp->psw_infop->p_ops;
if (opsp->psm_probe) {
+ PRM_POINT("psm_probe()");
if ((*opsp->psm_probe)() == PSM_SUCCESS) {
+ PRM_POINT("psm_probe() PSM_SUCCESS");
psmcnt++;
swp->psw_flag |= PSM_MOD_IDENTIFY;
swp = swp->psw_forw;
continue;
}
+ PRM_POINT("psm_probe() FAILURE");
}
/* remove the unsuccessful psm modules */
cswp = swp;
@@ -429,7 +429,8 @@ psm_install(void)
}
mutex_exit(&psmsw_lock);
if (psmcnt == 0)
- halt(NOTSUP_MSG);
+ halt("the operating system does not yet support this hardware");
+ PRM_POINT("psminitf()");
(*psminitf)();
}
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index f36f5f052d..f017995ac8 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -26,6 +26,7 @@
* Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
* Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#define PSMI_1_7
@@ -63,6 +64,8 @@
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/cpc_pcbe.h>
+#include <sys/prom_debug.h>
+
#define OFFSETOF(s, m) (size_t)(&(((s *)0)->m))
@@ -978,6 +981,7 @@ mach_init()
{
struct psm_ops *pops;
+ PRM_POINT("mach_construct_info()");
mach_construct_info();
pops = mach_set[0];
@@ -1017,6 +1021,7 @@ mach_init()
notify_error = pops->psm_notify_error;
}
+ PRM_POINT("psm_softinit()");
(*pops->psm_softinit)();
/*
@@ -1034,6 +1039,7 @@ mach_init()
#ifndef __xpv
non_deep_idle_disp_enq_thread = disp_enq_thread;
#endif
+ PRM_DEBUG(idle_cpu_use_hlt);
if (idle_cpu_use_hlt) {
idle_cpu = cpu_idle_adaptive;
CPU->cpu_m.mcpu_idle_cpu = cpu_idle;
@@ -1068,6 +1074,7 @@ mach_init()
#endif
}
+ PRM_POINT("mach_smpinit()");
mach_smpinit();
}
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index 636e58280a..dd2b5d703b 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -25,6 +25,7 @@
* Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2018 Joyent, Inc.
* Copyright (c) 2015 by Delphix. All rights reserved.
+ * Copyright 2020 Oxide Computer Company
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -74,6 +75,7 @@
#include <sys/memlist_plat.h>
#include <sys/varargs.h>
#include <sys/promif.h>
+#include <sys/prom_debug.h>
#include <sys/modctl.h>
#include <sys/sunddi.h>
@@ -464,7 +466,7 @@ static pgcnt_t kphysm_init(page_t *, pgcnt_t);
* | |
* 0xFFFFFXXX.XXX00000 |-----------------------|- segkvmm_base (floating)
* | segkp |
- * |-----------------------|- segkp_base (floating)
+ * |-----------------------|- segkp_base (floating)
* | page_t structures | valloc_base + valloc_sz
* | memsegs, memlists, |
* | page hash, etc. |
@@ -623,21 +625,8 @@ size_t toxic_bit_map_len = 0; /* in bits */
#endif /* __i386 */
-/*
- * Simple boot time debug facilities
- */
-static char *prm_dbg_str[] = {
- "%s:%d: '%s' is 0x%x\n",
- "%s:%d: '%s' is 0x%llx\n"
-};
-
int prom_debug;
-#define PRM_DEBUG(q) if (prom_debug) \
- prom_printf(prm_dbg_str[sizeof (q) >> 3], "startup.c", __LINE__, #q, q);
-#define PRM_POINT(q) if (prom_debug) \
- prom_printf("%s:%d: %s\n", "startup.c", __LINE__, q);
-
/*
* This structure is used to keep track of the intial allocations
* done in startup_memlist(). The value of NUM_ALLOCATIONS needs to
@@ -2273,6 +2262,7 @@ startup_end(void)
* We can now setup for XSAVE because fpu_probe is done in configure().
*/
if (fp_save_mech == FP_XSAVE) {
+ PRM_POINT("xsave_setup_msr()");
xsave_setup_msr(CPU);
}
@@ -2281,7 +2271,9 @@ startup_end(void)
* support.
*/
setx86isalist();
+ PRM_POINT("cpu_intr_alloc()");
cpu_intr_alloc(CPU, NINTR_THREADS);
+ PRM_POINT("psm_install()");
psm_install();
/*
diff --git a/usr/src/uts/i86pc/sys/hpet_acpi.h b/usr/src/uts/i86pc/sys/hpet_acpi.h
index e60ebe4bba..81304674b5 100644
--- a/usr/src/uts/i86pc/sys/hpet_acpi.h
+++ b/usr/src/uts/i86pc/sys/hpet_acpi.h
@@ -36,7 +36,7 @@ extern "C" {
#endif
/*
- * Solaris uses an HPET Timer to generate interrupts for CPUs in Deep C-state
+ * illumos uses an HPET Timer to generate interrupts for CPUs in Deep C-state
* with stalled LAPIC Timers. All CPUs use one HPET timer. The timer's
* interrupt targets one CPU (via the I/O APIC). The one CPU that receives
* the HPET's interrupt wakes up other CPUs as needed during the HPET Interrupt
@@ -46,7 +46,7 @@ extern "C" {
* Please see the Intel Programmer's guides. Interrupts are disabled before
* a CPU Halts into Deep C-state. (This allows CPU-hardware-specific cleanup
* before servicing interrupts.) When a Deep C-state CPU wakes up (due to
- * an externally generated interrupt), it resume execution where it halted.
+ * an externally generated interrupt), it resumes execution where it halted.
* The CPU returning from Deep C-state must enable interrupts before it will
* handle the pending interrupt that woke it from Deep C-state.
*
@@ -72,7 +72,7 @@ extern "C" {
* } timers[32];
* }
*
- * There are 32 possible timers in an hpet. Only the first 3 timers are
+ * There are 32 possible timers in an HPET. Only the first 3 timers are
* required. The other 29 timers are optional.
*
* HPETs can have 64-bit or 32-bit timers. Timers/compare registers can
@@ -80,7 +80,7 @@ extern "C" {
* The first two timers are not used. The HPET spec intends the first two
* timers to be used as "legacy replacement" for the PIT and RTC timers.
*
- * Solaris uses the first available non-legacy replacement timer as a proxy
+ * illumos uses the first available non-legacy replacement timer as a proxy
* timer for processor Local APIC Timers that stop in deep idle C-states.
*/
@@ -97,7 +97,7 @@ extern "C" {
#define HPET_SIZE (1024)
/*
- * Offsets of hpet registers and macros to access them from HPET base address.
+ * Offsets of HPET registers and macros to access them from HPET base address.
*/
#define HPET_GEN_CAP_OFFSET (0)
#define HPET_GEN_CONFIG_OFFSET (0x10)
diff --git a/usr/src/uts/i86pc/sys/prom_debug.h b/usr/src/uts/i86pc/sys/prom_debug.h
new file mode 100644
index 0000000000..ae64d91711
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/prom_debug.h
@@ -0,0 +1,72 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Oxide Computer Company
+ */
+
+#ifndef _SYS_PROM_DEBUG_H
+#define _SYS_PROM_DEBUG_H
+
+#include <sys/promif.h>
+
+/*
+ * These macros are used to emit coarse-grained early boot debugging
+ * information when the user sets "prom_debug" in the boot environment. They
+ * should only be used for information that we cannot easily obtain through a
+ * richer mechanism because the machine hangs or crashes before other debugging
+ * tools are available.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int prom_debug;
+
+/*
+ * Print a string message, used to signal that we have at least reached a
+ * particular point in the code:
+ */
+#define PRM_POINT(q) do { \
+ if (prom_debug) { \
+ prom_printf("%s:%d: %s\n", \
+ __FILE__, __LINE__, (q)); \
+ } \
+ } while (0)
+
+/*
+ * Print the name and value of an integer variable:
+ */
+#define PRM_DEBUG(q) do { \
+ if (prom_debug) { \
+ prom_printf("%s:%d: '%s' is 0x%llx\n", \
+ __FILE__, __LINE__, #q, (long long)(q)); \
+ } \
+ } while (0)
+
+/*
+ * Print the name and value of a string (char *) variable (which may be NULL):
+ */
+#define PRM_DEBUGS(q) do { \
+ if (prom_debug) { \
+ const char *qq = q; \
+ prom_printf("%s:%d: '%s' is '%s'\n", \
+ __FILE__, __LINE__, #q, \
+ qq != NULL ? qq : "<NULL>"); \
+ } \
+ } while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PROM_DEBUG_H */
diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile
index e7f07c4c4e..0106dd0a0f 100644
--- a/usr/src/uts/i86pc/vmm/Makefile
+++ b/usr/src/uts/i86pc/vmm/Makefile
@@ -43,7 +43,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
# Overrides and additions
#
-CERRWARN += -_gcc=-Wno-empty-body
# 3rd party code
SMOFF += all_func_returns
@@ -51,9 +50,6 @@ SMOFF += all_func_returns
# needs work
$(OBJS_DIR)/vmm_sol_dev.o := SMOFF += signed_integer_overflow_check
-# a can't happen: vmx_setcap() warn: variable dereferenced before check 'pptr'
-$(OBJS_DIR)/vmx.o := SMOFF += deref_check
-
ALL_BUILDS = $(ALL_BUILDSONLY64)
DEF_BUILDS = $(DEF_BUILDSONLY64)
PRE_INC_PATH = -I$(COMPAT)/bhyve -I$(COMPAT)/bhyve/amd64 \
@@ -61,17 +57,9 @@ PRE_INC_PATH = -I$(COMPAT)/bhyve -I$(COMPAT)/bhyve/amd64 \
INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(UTSBASE)/i86pc/io/vmm/io
AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR)
-CFLAGS += -_gcc=-Wimplicit-function-declaration
-# The FreeBSD %# notation makes gcc gripe
-CFLAGS += -_gcc=-Wno-format
# enable collection of VMM statistics
CFLAGS += -DVMM_KEEP_STATS
-$(OBJS_DIR)/vmm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits
-$(OBJS_DIR)/svm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits
-$(OBJS_DIR)/vmx.o := CERRWARN += -_gcc=-Wno-unused-variable
-$(OBJS_DIR)/iommu.o := CERRWARN += -_gcc=-Wno-unused-variable
-
LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev
LDFLAGS += -z type=kmod -M $(MAPFILE)
diff --git a/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c
index 6b7da6a99a..7be8a4a9f8 100644
--- a/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c
+++ b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c
@@ -684,13 +684,14 @@ amdf17nbdf_ioctl_kind(intptr_t arg, int mode)
}
static int
-amdf17nbdf_ioctl_temp(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg, int mode)
+amdf17nbdf_ioctl_scalar(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg,
+ int mode)
{
amdf17nb_t *nb;
hrtime_t diff;
- sensor_ioctl_temperature_t temp;
+ sensor_ioctl_scalar_t scalar;
- bzero(&temp, sizeof (temp));
+ bzero(&scalar, sizeof (scalar));
mutex_enter(&nbdf->amd_nbdf_lock);
nb = amdf17nbdf_lookup_nb(nbdf, minor);
@@ -710,12 +711,12 @@ amdf17nbdf_ioctl_temp(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg, int mode)
}
}
- temp.sit_unit = SENSOR_UNIT_CELSIUS;
- temp.sit_temp = nb->amd_nb_temp;
- temp.sit_gran = AMDF17_THERMAL_GRANULARITY;
+ scalar.sis_unit = SENSOR_UNIT_CELSIUS;
+ scalar.sis_value = nb->amd_nb_temp;
+ scalar.sis_gran = AMDF17_THERMAL_GRANULARITY;
mutex_exit(&nbdf->amd_nbdf_lock);
- if (ddi_copyout(&temp, (void *)arg, sizeof (temp),
+ if (ddi_copyout(&scalar, (void *)arg, sizeof (scalar),
mode & FKIOCTL) != 0) {
return (EFAULT);
}
@@ -737,10 +738,10 @@ amdf17nbdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
m = getminor(dev);
switch (cmd) {
- case SENSOR_IOCTL_TYPE:
+ case SENSOR_IOCTL_KIND:
return (amdf17nbdf_ioctl_kind(arg, mode));
- case SENSOR_IOCTL_TEMPERATURE:
- return (amdf17nbdf_ioctl_temp(nbdf, m, arg, mode));
+ case SENSOR_IOCTL_SCALAR:
+ return (amdf17nbdf_ioctl_scalar(nbdf, m, arg, mode));
default:
return (ENOTTY);
}
diff --git a/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c b/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c
index 1330f8563f..17934520fd 100644
--- a/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c
+++ b/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c
@@ -103,7 +103,7 @@ typedef struct amdnbtemp {
static void *amdnbtemp_state;
static int
-amdnbtemp_read(void *arg, sensor_ioctl_temperature_t *temp)
+amdnbtemp_read(void *arg, sensor_ioctl_scalar_t *scalar)
{
amdnbtemp_t *at = arg;
@@ -120,9 +120,9 @@ amdnbtemp_read(void *arg, sensor_ioctl_temperature_t *temp)
at->at_temp -= AMDNBTEMP_TEMP_ADJUST;
}
- temp->sit_unit = SENSOR_UNIT_CELSIUS;
- temp->sit_gran = AMDNBTEMP_GRANULARITY;
- temp->sit_temp = at->at_temp;
+ scalar->sis_unit = SENSOR_UNIT_CELSIUS;
+ scalar->sis_gran = AMDNBTEMP_GRANULARITY;
+ scalar->sis_value = at->at_temp;
mutex_exit(&at->at_mutex);
return (0);
@@ -130,7 +130,7 @@ amdnbtemp_read(void *arg, sensor_ioctl_temperature_t *temp)
static const ksensor_ops_t amdnbtemp_temp_ops = {
.kso_kind = ksensor_kind_temperature,
- .kso_temp = amdnbtemp_read
+ .kso_scalar = amdnbtemp_read
};
static void
diff --git a/usr/src/uts/intel/io/coretemp/coretemp.c b/usr/src/uts/intel/io/coretemp/coretemp.c
index ee2d143554..bea8078002 100644
--- a/usr/src/uts/intel/io/coretemp/coretemp.c
+++ b/usr/src/uts/intel/io/coretemp/coretemp.c
@@ -259,7 +259,7 @@ coretemp_update(coretemp_t *ct, coretemp_sensor_t *sensor, cmi_hdl_t hdl)
}
static int
-coretemp_read(void *arg, sensor_ioctl_temperature_t *sit)
+coretemp_read(void *arg, sensor_ioctl_scalar_t *scalar)
{
coretemp_sensor_t *sensor = arg;
coretemp_t *ct = sensor->cs_coretemp;
@@ -313,10 +313,10 @@ coretemp_read(void *arg, sensor_ioctl_temperature_t *sit)
sensor->cs_temperature = sensor->cs_tjmax - reading;
sensor->cs_resolution = resolution;
- sit->sit_unit = SENSOR_UNIT_CELSIUS;
- sit->sit_temp = sensor->cs_temperature;
- sit->sit_gran = CORETEMP_GRANULARITY;
- sit->sit_prec = sensor->cs_resolution;
+ scalar->sis_unit = SENSOR_UNIT_CELSIUS;
+ scalar->sis_value = sensor->cs_temperature;
+ scalar->sis_gran = CORETEMP_GRANULARITY;
+ scalar->sis_prec = sensor->cs_resolution;
mutex_exit(&ct->coretemp_mutex);
return (0);
@@ -324,7 +324,7 @@ coretemp_read(void *arg, sensor_ioctl_temperature_t *sit)
static const ksensor_ops_t coretemp_temp_ops = {
.kso_kind = ksensor_kind_temperature,
- .kso_temp = coretemp_read
+ .kso_scalar = coretemp_read
};
static void
diff --git a/usr/src/uts/intel/io/pchtemp/pchtemp.c b/usr/src/uts/intel/io/pchtemp/pchtemp.c
index 4aeb098112..2cfd7ae806 100644
--- a/usr/src/uts/intel/io/pchtemp/pchtemp.c
+++ b/usr/src/uts/intel/io/pchtemp/pchtemp.c
@@ -137,7 +137,7 @@ pchtemp_read_check(pchtemp_t *pch)
}
static int
-pchtemp_read(void *arg, sensor_ioctl_temperature_t *sit)
+pchtemp_read(void *arg, sensor_ioctl_scalar_t *scalar)
{
uint16_t temp, ctt, tahv, talv;
uint8_t tsel;
@@ -175,9 +175,9 @@ pchtemp_read(void *arg, sensor_ioctl_temperature_t *sit)
}
pch->pcht_temp = (temp & PCHTEMP_REG_TEMP_TSR) - PCHTEMP_TEMP_OFFSET;
- sit->sit_unit = SENSOR_UNIT_CELSIUS;
- sit->sit_gran = PCHTEMP_TEMP_RESOLUTION;
- sit->sit_temp = pch->pcht_temp;
+ scalar->sis_unit = SENSOR_UNIT_CELSIUS;
+ scalar->sis_gran = PCHTEMP_TEMP_RESOLUTION;
+ scalar->sis_value = pch->pcht_temp;
mutex_exit(&pch->pcht_mutex);
return (0);
@@ -185,7 +185,7 @@ pchtemp_read(void *arg, sensor_ioctl_temperature_t *sit)
static const ksensor_ops_t pchtemp_temp_ops = {
.kso_kind = ksensor_kind_temperature,
- .kso_temp = pchtemp_read
+ .kso_scalar = pchtemp_read
};
static void
diff --git a/usr/src/uts/intel/tem/Makefile b/usr/src/uts/intel/tem/Makefile
index 9eca2e7d98..1165cf3264 100644
--- a/usr/src/uts/intel/tem/Makefile
+++ b/usr/src/uts/intel/tem/Makefile
@@ -38,7 +38,6 @@ UTSBASE = ../..
#
MODULE = tem
OBJECTS = $(TEM_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(TEM_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
#
@@ -50,22 +49,11 @@ include $(UTSBASE)/intel/Makefile.intel
# Define targets
#
ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
LDFLAGS += -dy -Ndacf/consconfig_dacf
#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_STATIC_UNUSED
-
-CERRWARN += -_gcc=-Wno-unused-function
-CERRWARN += $(CNOWARN_UNINIT)
-
-#
# Default build targets.
#
.KEEP_STATE:
@@ -78,12 +66,6 @@ clean: $(CLEAN_DEPS)
clobber: $(CLOBBER_DEPS)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS)
#
diff --git a/usr/src/uts/sparc/tem/Makefile b/usr/src/uts/sparc/tem/Makefile
index 12d9741c56..ee46e5852d 100644
--- a/usr/src/uts/sparc/tem/Makefile
+++ b/usr/src/uts/sparc/tem/Makefile
@@ -40,9 +40,6 @@ UTSBASE = ../..
#
MODULE = tem
OBJECTS = $(TEM_OBJS:%=$(OBJS_DIR)/%) $(FONT_OBJS:%=$(OBJS_DIR)/%)
-
-LINTS = $(TEM_OBJS:%.o=$(LINTS_DIR)/%.ln)
-LINTS += $(FONT_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
#
@@ -54,24 +51,11 @@ include $(UTSBASE)/sparc/Makefile.sparc
# Define targets
#
ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
-CFLAGS += $(CCVERBOSE)
-
LDFLAGS += -dy -Ndacf/consconfig_dacf
#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_STATIC_UNUSED
-
-CERRWARN += -_gcc=-Wno-unused-function
-CERRWARN += $(CNOWARN_UNINIT)
-
-#
# Default build targets.
#
.KEEP_STATE:
@@ -84,19 +68,9 @@ clean: $(CLEAN_DEPS)
clobber: $(CLOBBER_DEPS)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS)
#
# Include common targets.
#
include $(UTSBASE)/sparc/Makefile.targ
-
-CLOBBERFILES += \
- $(OBJS_DIR)/$(VGATEXT_FONT).o \
- $(OBJS_DIR)/$(VGATEXT_FONT).c
diff --git a/usr/src/uts/sun/sys/ser_async.h b/usr/src/uts/sun/sys/ser_async.h
index 8e8a573829..0f89bce4c5 100644
--- a/usr/src/uts/sun/sys/ser_async.h
+++ b/usr/src/uts/sun/sys/ser_async.h
@@ -27,8 +27,6 @@
#ifndef _SYS_SER_ASYNC_H
#define _SYS_SER_ASYNC_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Initial port setup parameters for async lines
*/
@@ -57,9 +55,9 @@ extern "C" {
#define ZFIFOSZ 3
/*
- * this macro needs a constant Hertz, but we can now have a hires_tick.
+ * This macro needs a constant 100 Hz, but hires_tick or hz may change that.
* ztdelay in zs_async.c converts to a true delay based on hz so we
- * can use 100 for Hertz here.
+ * can use 100 Hz here.
*/
#define ZDELAY(n) ZSDelayConst(100, ZFIFOSZ, NBBY, n)
@@ -166,9 +164,9 @@ struct asyncline {
* and the second byte is the actual data. The ring buffer
* needs to be defined as ushort_t to accomodate this.
*/
- ushort_t za_ring[RINGSIZE];
+ ushort_t za_ring[RINGSIZE];
timeout_id_t za_kick_rcv_id;
- int za_kick_rcv_count;
+ int za_kick_rcv_count;
timeout_id_t za_zsa_restart_id;
bufcall_id_t za_bufcid;
mblk_t *za_rstandby[ZSA_MAX_RSTANDBY];
diff --git a/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c b/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c
index aa96f19079..a8ceea0344 100644
--- a/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c
+++ b/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright 2020 Nexenta by DDN, Inc. All rights reserved.
+ */
+
#include <sys/file.h>
#include <sys/sunndi.h>
#include <sys/sunddi.h>
@@ -101,7 +105,6 @@ static uint64_t counter_reg_offsets[] = {
static ldi_ident_t ldi_identifier;
static boolean_t ldi_identifier_valid = B_FALSE;
-static cred_t *credentials = NULL;
/* Called by _init to determine if it is OK to install driver. */
int
@@ -116,7 +119,6 @@ fpc_platform_module_init(dev_info_t *dip)
{
int status;
- credentials = crget();
status = ldi_ident_from_dip(dip, &ldi_identifier);
if (status == 0)
ldi_identifier_valid = B_TRUE;
@@ -211,8 +213,6 @@ fpc_platform_module_fini(dev_info_t *dip)
{
if (ldi_identifier_valid)
ldi_ident_release(ldi_identifier);
- if (credentials)
- crfree(credentials);
}
fire_perfreg_handle_t
@@ -226,7 +226,7 @@ fpc_get_perfreg_handle(int devnum)
if ((handle_impl->devspec =
fpc_get_platform_data_by_number(devnum)) != NULL) {
rval = ldi_open_by_name(handle_impl->devspec->nodename,
- OPEN_FLAGS, credentials, &handle_impl->devhandle,
+ OPEN_FLAGS, kcred, &handle_impl->devhandle,
ldi_identifier);
}
@@ -243,7 +243,7 @@ fpc_free_counter_handle(fire_perfreg_handle_t handle)
{
fire_counter_handle_impl_t *handle_impl =
(fire_counter_handle_impl_t *)handle;
- (void) ldi_close(handle_impl->devhandle, OPEN_FLAGS, credentials);
+ (void) ldi_close(handle_impl->devhandle, OPEN_FLAGS, kcred);
kmem_free(handle_impl, sizeof (fire_counter_handle_impl_t));
return (SUCCESS);
}
@@ -281,7 +281,7 @@ fpc_event_io(fire_perfreg_handle_t handle, fire_perfcnt_t group,
/* Read original value. */
if (((rval = ldi_ioctl(handle_impl->devhandle, cmd, (intptr_t)&prg,
- FKIOCTL, credentials, &ioctl_rval)) == SUCCESS) && (!is_write)) {
+ FKIOCTL, kcred, &ioctl_rval)) == SUCCESS) && (!is_write)) {
*reg_data = prg.data;
}
@@ -322,7 +322,7 @@ fpc_counter_io(fire_perfreg_handle_t handle, fire_perfcnt_t group,
prg.data = *value;
if (((rval = ldi_ioctl(handle_impl->devhandle, command, (intptr_t)&prg,
- FKIOCTL, credentials, &ioctl_rval)) == SUCCESS) && (!is_write)) {
+ FKIOCTL, kcred, &ioctl_rval)) == SUCCESS) && (!is_write)) {
*value = prg.data;
}
diff --git a/usr/src/uts/sun4v/ontario/io/tsalarm.c b/usr/src/uts/sun4v/ontario/io/tsalarm.c
index 7fb9577028..6f80db8e88 100644
--- a/usr/src/uts/sun4v/ontario/io/tsalarm.c
+++ b/usr/src/uts/sun4v/ontario/io/tsalarm.c
@@ -544,7 +544,7 @@ FAIL:
if (rv != 0) {
if (softc->flags & TSAL_OPENED)
(void) ldi_close(softc->lh, FREAD|FWRITE, credp);
- if (softc->flags * TSAL_IDENTED)
+ if (softc->flags & TSAL_IDENTED)
(void) ldi_ident_release(softc->li);
softc->flags &= ~(TSAL_OPENED | TSAL_IDENTED);
if (softc->req_ptr != NULL)