diff options
Diffstat (limited to 'usr/src/uts')
99 files changed, 2560 insertions, 2368 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 08fb3d45ac..e973cf58ad 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1694,7 +1694,7 @@ TEM_OBJS += tem.o tem_safe.o # Font data for generated console fonts # i386_FONT = 8x16 -i386_FONT_SRC= ter-u16n +i386_FONT_SRC= ter-u16b sparc_FONT = 12x22 sparc_FONT_SRC= Gallant19 FONT=$($(MACH)_FONT) @@ -2342,4 +2342,4 @@ BNX_OBJS += \ # mlxcx(7D) # MLXCX_OBJS += mlxcx.o mlxcx_dma.o mlxcx_cmd.o mlxcx_intr.o mlxcx_gld.o \ - mlxcx_ring.o + mlxcx_ring.o mlxcx_sensor.o diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c index c44c32ef29..575acd59a2 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -22,6 +22,7 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright 2019 Joyent, Inc. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -470,7 +471,7 @@ typedef struct lxpr_rlimtab { char *rlim_rctl; /* rctl source */ } lxpr_rlimtab_t; -#define RLIM_MAXFD "Max open files" +#define RLIM_MAXFD "Max open files" static lxpr_rlimtab_t lxpr_rlimtab[] = { { "Max cpu time", "seconds", "process.max-cpu-time" }, @@ -1737,8 +1738,9 @@ lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) * match the max value so that we do not output "unlimited". */ if (strcmp(lxpr_rlimtab[i].rlim_name, RLIM_MAXFD) == 0 && - cur[i] == RLIM_INFINITY) - cur[i] = max[i]; + cur[i] == RLIM_INFINITY) { + cur[i] = max[i]; + } } lxpr_unlock(p); @@ -4001,10 +4003,10 @@ lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) * model, so just inform the caller that no swap is being used. * * MemAvailable - * MemAvailable entry is available since Linux Kernel +3.14, is an - * estimate of how much memory is available for starting new applications, - * without swapping. In lxbrand we will always return the available free - * memory as an estimate of this value. + * MemAvailable entry is available since Linux Kernel +3.14, is an + * estimate of how much memory is available for starting new + * applications, without swapping. In lxbrand we will always return the + * available free memory as an estimate of this value. */ lxpr_uiobuf_printf(uiobuf, "MemTotal: %8lu kB\n" @@ -8094,6 +8096,58 @@ lxpr_write_pid_loginuid(lxpr_node_t *lxpnp, struct uio *uio, struct cred *cr, return (0); } +static int +lxpr_readlink_exe(lxpr_node_t *lxpnp, char *buf, size_t size, cred_t *cr) +{ + size_t dlen = DIRENT64_RECLEN(MAXPATHLEN); + dirent64_t *dp; + vnode_t *dirvp; + int error = ENOENT; + char *dbuf; + proc_t *p; + size_t len; + + p = lxpr_lock(lxpnp, NO_ZOMB); + + if (p == NULL) + return (error); + + dirvp = p->p_execdir; + if (dirvp == NULL) { + lxpr_unlock(p); + return (error); + } + + VN_HOLD(dirvp); + lxpr_unlock(p); + + /* Look up the parent directory path */ + if ((error = vnodetopath(NULL, dirvp, buf, size, cr)) != 0) { + VN_RELE(dirvp); + return (error); + } + + len = strlen(buf); + + dbuf = kmem_alloc(dlen, KM_SLEEP); + + /* + * Walk the parent directory to find the vnode for p->p_exec, in order + * to derive its path. + */ + if ((error = dirfindvp(NULL, dirvp, lxpnp->lxpr_realvp, + cr, dbuf, dlen, &dp)) == 0 && + strlen(dp->d_name) + len + 1 < size) { + buf[len] = '/'; + (void) strcpy(buf + len + 1, dp->d_name); + } else { + error = ENOENT; + } + VN_RELE(dirvp); + kmem_free(dbuf, dlen); + return (error); +} + /* * lxpr_readlink(): Vnode operation for VOP_READLINK() */ @@ -8135,7 +8189,16 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) if (error != 0) return (error); - if ((error = vnodetopath(NULL, rvp, bp, buflen, cr)) != 0) { + error = vnodetopath(NULL, rvp, bp, buflen, cr); + + /* + * Special handling for /proc/<pid>/exe where the vnode path is + * not cached. + */ + if (error != 0 && lxpnp->lxpr_type == LXPR_PID_EXE) + error = lxpr_readlink_exe(lxpnp, bp, buflen, cr); + + if (error != 0) { /* * Special handling possible for /proc/<pid>/fd/<num> * Generate <type>:[<inode>] links, if allowed. diff --git a/usr/src/uts/common/conf/param.c b/usr/src/uts/common/conf/param.c index 1120748b98..06920c3574 100644 --- a/usr/src/uts/common/conf/param.c +++ b/usr/src/uts/common/conf/param.c @@ -116,7 +116,7 @@ const unsigned int _diskrpm = (unsigned int)DISKRPM; const unsigned long _pgthresh = (unsigned long)PGTHRESH; const unsigned int _maxslp = (unsigned int)MAXSLP; const unsigned long _maxhandspreadpages = (unsigned long)MAXHANDSPREADPAGES; -const int _ncpu = (int)NCPU; +const int _ncpu = (int)NCPU; const int _ncpu_log2 = (int)NCPU_LOG2; const int _ncpu_p2 = (int)NCPU_P2; const unsigned long _defaultstksz = (unsigned long)DEFAULTSTKSZ; @@ -131,9 +131,12 @@ const unsigned int _nbpg = (unsigned int)MMU_PAGESIZE; */ /* - * Default hz is 100, but if we set hires_tick we get higher resolution - * clock behavior (currently defined to be 1000 hz). Higher values seem - * to work, but are not supported. + * hz is 100, but we set hires_tick to get higher resolution clock behavior + * (currently defined to be 1000 hz). Higher values seem to work, but are not + * supported. + * + * This is configured via hires_tick to allow users to explicitly customize it + * to 0 should the need arise. * * If we do decide to play with higher values, remember that hz should * satisfy the following constraints to avoid integer round-off problems: @@ -160,7 +163,7 @@ const unsigned int _nbpg = (unsigned int)MMU_PAGESIZE; int hz = HZ_DEFAULT; int hires_hz = HIRES_HZ_DEFAULT; -int hires_tick = 0; +int hires_tick = 1; int cpu_decay_factor = 10; /* this is no longer tied to clock */ int max_hres_adj; /* maximum adjustment of hrtime per tick */ int tick_per_msec; /* clock ticks per millisecond (zero if hz < 1000) */ diff --git a/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c b/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c index 4240328207..4a657bbf19 100644 --- a/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c +++ b/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c @@ -447,6 +447,8 @@ smb2_fsctl_copychunk_meta(smb_request_t *sr, smb_ofile_t *src_of) * here don't generally have WRITE_DAC access (sigh) so we * have to bypass ofile access checks for this operation. * The file-system level still does its access checking. + * + * TODO: this should really copy the SACL, too. */ smb_fssd_init(&fs_sd, secinfo, sd_flags); sr->fid_ofile = NULL; diff --git a/usr/src/uts/common/fs/smbsrv/smb_fsops.c b/usr/src/uts/common/fs/smbsrv/smb_fsops.c index 8fafac5f60..43b513e840 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_fsops.c +++ b/usr/src/uts/common/fs/smbsrv/smb_fsops.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ #include <sys/sid.h> @@ -147,10 +147,9 @@ smb_fsop_create_with_sd(smb_request_t *sr, cred_t *cr, is_dir = ((fs_sd->sd_flags & SMB_FSSD_FLAGS_DIR) != 0); if (smb_tree_has_feature(sr->tid_tree, SMB_TREE_ACLONCREATE)) { - if (fs_sd->sd_secinfo & SMB_ACL_SECINFO) { - dacl = fs_sd->sd_zdacl; - sacl = fs_sd->sd_zsacl; - ASSERT(dacl || sacl); + dacl = fs_sd->sd_zdacl; + sacl = fs_sd->sd_zsacl; + if (dacl != NULL || sacl != NULL) { if (dacl && sacl) { acl = smb_fsacl_merge(dacl, sacl); } else if (dacl) { @@ -466,15 +465,20 @@ smb_fsop_create_file(smb_request_t *sr, cred_t *cr, if (op->sd) { /* * SD sent by client in Windows format. Needs to be - * converted to FS format. No inheritance. + * converted to FS format. Inherit DACL/SACL if they're not + * specified. */ secinfo = smb_sd_get_secinfo(op->sd); + smb_fssd_init(&fs_sd, secinfo, 0); status = smb_sd_tofs(op->sd, &fs_sd); if (status == NT_STATUS_SUCCESS) { - rc = smb_fsop_create_with_sd(sr, cr, dnode, - name, attr, ret_snode, &fs_sd); + rc = smb_fsop_sdinherit(sr, dnode, &fs_sd); + if (rc == 0) + rc = smb_fsop_create_with_sd(sr, cr, dnode, + name, attr, ret_snode, &fs_sd); + } else { rc = EINVAL; } @@ -485,7 +489,7 @@ smb_fsop_create_file(smb_request_t *sr, cred_t *cr, * Server applies Windows inheritance rules, * see smb_fsop_sdinherit() comments as to why. */ - smb_fssd_init(&fs_sd, SMB_ACL_SECINFO, 0); + smb_fssd_init(&fs_sd, 0, 0); rc = smb_fsop_sdinherit(sr, dnode, &fs_sd); if (rc == 0) { rc = smb_fsop_create_with_sd(sr, cr, dnode, @@ -607,15 +611,19 @@ smb_fsop_mkdir( if (op->sd) { /* * SD sent by client in Windows format. Needs to be - * converted to FS format. No inheritance. + * converted to FS format. Inherit DACL/SACL if they're not + * specified. */ secinfo = smb_sd_get_secinfo(op->sd); + smb_fssd_init(&fs_sd, secinfo, SMB_FSSD_FLAGS_DIR); status = smb_sd_tofs(op->sd, &fs_sd); if (status == NT_STATUS_SUCCESS) { - rc = smb_fsop_create_with_sd(sr, cr, dnode, - name, attr, ret_snode, &fs_sd); + rc = smb_fsop_sdinherit(sr, dnode, &fs_sd); + if (rc == 0) + rc = smb_fsop_create_with_sd(sr, cr, dnode, + name, attr, ret_snode, &fs_sd); } else rc = EINVAL; @@ -626,7 +634,7 @@ smb_fsop_mkdir( * Server applies Windows inheritance rules, * see smb_fsop_sdinherit() comments as to why. */ - smb_fssd_init(&fs_sd, SMB_ACL_SECINFO, SMB_FSSD_FLAGS_DIR); + smb_fssd_init(&fs_sd, 0, SMB_FSSD_FLAGS_DIR); rc = smb_fsop_sdinherit(sr, dnode, &fs_sd); if (rc == 0) { rc = smb_fsop_create_with_sd(sr, cr, dnode, @@ -2391,6 +2399,8 @@ smb_fsop_sdmerge(smb_request_t *sr, smb_node_t *snode, smb_fssd_t *fs_sd) * owner has been specified. Callers should translate this to * STATUS_INVALID_OWNER which is not the normal mapping for EPERM * in upper layers, so EPERM is mapped to EBADE. + * + * If 'overwrite' is non-zero, then the existing ACL is ignored. */ int smb_fsop_sdwrite(smb_request_t *sr, cred_t *cr, smb_node_t *snode, @@ -2456,14 +2466,13 @@ smb_fsop_sdwrite(smb_request_t *sr, cred_t *cr, smb_node_t *snode, } if (fs_sd->sd_secinfo & SMB_ACL_SECINFO) { - if (overwrite == 0) { + if (overwrite == 0) error = smb_fsop_sdmerge(sr, snode, fs_sd); - if (error) - return (error); - } - error = smb_fsop_aclwrite(sr, cr, snode, fs_sd); - if (error) { + if (error == 0) + error = smb_fsop_aclwrite(sr, cr, snode, fs_sd); + + if (error != 0) { /* * Revert uid/gid changes if required. */ @@ -2511,39 +2520,46 @@ smb_fsop_sdinherit(smb_request_t *sr, smb_node_t *dnode, smb_fssd_t *fs_sd) acl_t *sacl = NULL; int is_dir; int error; + uint32_t secinfo; + smb_fssd_t pfs_sd; ASSERT(fs_sd); - if (sr->tid_tree->t_acltype != ACE_T) { - /* - * No forced inheritance for non-ZFS filesystems. - */ - fs_sd->sd_secinfo = 0; + secinfo = fs_sd->sd_secinfo; + + /* Anything to do? */ + if ((secinfo & SMB_ACL_SECINFO) == SMB_ACL_SECINFO) + return (0); + + /* + * No forced inheritance for non-ZFS filesystems. + */ + if (sr->tid_tree->t_acltype != ACE_T) return (0); - } + smb_fssd_init(&pfs_sd, SMB_ACL_SECINFO, fs_sd->sd_flags); /* Fetch parent directory's ACL */ - error = smb_fsop_sdread(sr, zone_kcred(), dnode, fs_sd); + error = smb_fsop_sdread(sr, zone_kcred(), dnode, &pfs_sd); if (error) { return (error); } is_dir = (fs_sd->sd_flags & SMB_FSSD_FLAGS_DIR); - dacl = smb_fsacl_inherit(fs_sd->sd_zdacl, is_dir, SMB_DACL_SECINFO, - sr->user_cr); - sacl = smb_fsacl_inherit(fs_sd->sd_zsacl, is_dir, SMB_SACL_SECINFO, - sr->user_cr); - - if (sacl == NULL) - fs_sd->sd_secinfo &= ~SMB_SACL_SECINFO; - - smb_fsacl_free(fs_sd->sd_zdacl); - smb_fsacl_free(fs_sd->sd_zsacl); + if ((secinfo & SMB_DACL_SECINFO) == 0) { + dacl = smb_fsacl_inherit(pfs_sd.sd_zdacl, is_dir, + SMB_DACL_SECINFO, sr->user_cr); + fs_sd->sd_zdacl = dacl; + } - fs_sd->sd_zdacl = dacl; - fs_sd->sd_zsacl = sacl; + if ((secinfo & SMB_SACL_SECINFO) == 0) { + sacl = smb_fsacl_inherit(pfs_sd.sd_zsacl, is_dir, + SMB_SACL_SECINFO, sr->user_cr); + fs_sd->sd_zsacl = sacl; + } + smb_fsacl_free(pfs_sd.sd_zdacl); + smb_fsacl_free(pfs_sd.sd_zsacl); return (0); } #endif /* _KERNEL */ diff --git a/usr/src/uts/common/fs/smbsrv/smb_idmap.c b/usr/src/uts/common/fs/smbsrv/smb_idmap.c index b9bfa991c4..e6c04193b0 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_idmap.c +++ b/usr/src/uts/common/fs/smbsrv/smb_idmap.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ /* @@ -83,12 +83,12 @@ smb_idmap_getsid(uid_t id, int idtype, smb_sid_t **sid) switch (idtype) { case SMB_IDMAP_USER: - sim.sim_stat = kidmap_getsidbyuid(global_zone, id, + sim.sim_stat = kidmap_getsidbyuid(curzone, id, (const char **)&sim.sim_domsid, &sim.sim_rid); break; case SMB_IDMAP_GROUP: - sim.sim_stat = kidmap_getsidbygid(global_zone, id, + sim.sim_stat = kidmap_getsidbygid(curzone, id, (const char **)&sim.sim_domsid, &sim.sim_rid); break; @@ -150,17 +150,17 @@ smb_idmap_getid(smb_sid_t *sid, uid_t *id, int *idtype) switch (*idtype) { case SMB_IDMAP_USER: - sim.sim_stat = kidmap_getuidbysid(global_zone, sim.sim_domsid, + sim.sim_stat = kidmap_getuidbysid(curzone, sim.sim_domsid, sim.sim_rid, sim.sim_id); break; case SMB_IDMAP_GROUP: - sim.sim_stat = kidmap_getgidbysid(global_zone, sim.sim_domsid, + sim.sim_stat = kidmap_getgidbysid(curzone, sim.sim_domsid, sim.sim_rid, sim.sim_id); break; case SMB_IDMAP_UNKNOWN: - sim.sim_stat = kidmap_getpidbysid(global_zone, sim.sim_domsid, + sim.sim_stat = kidmap_getpidbysid(curzone, sim.sim_domsid, sim.sim_rid, sim.sim_id, &sim.sim_idtype); break; @@ -186,7 +186,7 @@ smb_idmap_batch_create(smb_idmap_batch_t *sib, uint16_t nmap, int flags) bzero(sib, sizeof (smb_idmap_batch_t)); - sib->sib_idmaph = kidmap_get_create(global_zone); + sib->sib_idmaph = kidmap_get_create(curzone); sib->sib_flags = flags; sib->sib_nmap = nmap; diff --git a/usr/src/uts/common/fs/smbsrv/smb_sd.c b/usr/src/uts/common/fs/smbsrv/smb_sd.c index ddbd7b9413..f7e056c511 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_sd.c +++ b/usr/src/uts/common/fs/smbsrv/smb_sd.c @@ -22,7 +22,7 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ /* @@ -243,16 +243,29 @@ smb_sd_tofs(smb_sd_t *sd, smb_fssd_t *fs_sd) } } + /* + * In SMB, the 'secinfo' determines which parts of the SD the client + * intends to change. Notably, this includes changing the DACL_PRESENT + * and SACL_PRESENT control bits. The client can specify e.g. + * SACL_SECINFO, but not SACL_PRESENT, and this means the client intends + * to remove the SACL. + * + * If the *_PRESENT bit isn't set, then the respective ACL will be NULL. + * [MS-DTYP] disallows providing an ACL when the PRESENT bit isn't set. + * This is enforced by smb_decode_sd(). + * + * We allow the SACL to be NULL, but we MUST have a DACL. + * If the DACL is NULL, that's equivalent to "everyone:full_set:allow". + */ + /* DACL */ if (fs_sd->sd_secinfo & SMB_DACL_SECINFO) { - if (sd->sd_control & SE_DACL_PRESENT) { - status = smb_acl_to_zfs(sd->sd_dacl, flags, - SMB_DACL_SECINFO, &fs_sd->sd_zdacl); - if (status != NT_STATUS_SUCCESS) - return (status); - } - else - return (NT_STATUS_INVALID_ACL); + ASSERT3U(((sd->sd_control & SE_DACL_PRESENT) != 0), ==, + (sd->sd_dacl != NULL)); + status = smb_acl_to_zfs(sd->sd_dacl, flags, + SMB_DACL_SECINFO, &fs_sd->sd_zdacl); + if (status != NT_STATUS_SUCCESS) + return (status); } /* SACL */ @@ -263,8 +276,6 @@ smb_sd_tofs(smb_sd_t *sd, smb_fssd_t *fs_sd) if (status != NT_STATUS_SUCCESS) { return (status); } - } else { - return (NT_STATUS_INVALID_ACL); } } diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index 9e04e5e00d..939282b378 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -2538,7 +2538,7 @@ arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, */ ret = SET_ERROR(EIO); spa_log_error(spa, zb); - zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, spa, NULL, zb, NULL, 0, 0); } @@ -5801,7 +5801,8 @@ arc_read_done(zio_t *zio) error = SET_ERROR(EIO); if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { spa_log_error(zio->io_spa, &acb->acb_zb); - zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + (void) zfs_ereport_post( + FM_EREPORT_ZFS_AUTHENTICATION, zio->io_spa, NULL, &acb->acb_zb, zio, 0, 0); } } @@ -6058,7 +6059,7 @@ top: rc = SET_ERROR(EIO); if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) { spa_log_error(spa, zb); - zfs_ereport_post( + (void) zfs_ereport_post( FM_EREPORT_ZFS_AUTHENTICATION, spa, NULL, zb, NULL, 0, 0); } diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c index f5ef390896..345189f695 100644 --- a/usr/src/uts/common/fs/zfs/dnode.c +++ b/usr/src/uts/common/fs/zfs/dnode.c @@ -1197,7 +1197,7 @@ dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object, dnode_t *dn; zrl_init(&dnh->dnh_zrlock); - zrl_tryenter(&dnh->dnh_zrlock); + VERIFY3U(1, ==, zrl_tryenter(&dnh->dnh_zrlock)); dn = dnode_create(os, dnp, NULL, object, dnh); DNODE_VERIFY(dn); diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index dc7317b411..4a060403da 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright 2020 Oxide Computer Company */ #include <sys/zfs_context.h> @@ -736,13 +737,22 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dsfra.dsfra_dnode = dn; dsfra.dsfra_tx = tx; dsfra.dsfra_free_indirects = freeing_dnode; + mutex_enter(&dn->dn_mtx); if (freeing_dnode) { ASSERT(range_tree_contains(dn->dn_free_ranges[txgoff], 0, dn->dn_maxblkid + 1)); } - mutex_enter(&dn->dn_mtx); - range_tree_vacate(dn->dn_free_ranges[txgoff], + /* + * Because dnode_sync_free_range() must drop dn_mtx during its + * processing, using it as a callback to range_tree_vacate() is + * not safe. No other operations (besides destroy) are allowed + * once range_tree_vacate() has begun, and dropping dn_mtx + * would leave a window open for another thread to observe that + * invalid (and unsafe) state. + */ + range_tree_walk(dn->dn_free_ranges[txgoff], dnode_sync_free_range, &dsfra); + range_tree_vacate(dn->dn_free_ranges[txgoff], NULL, NULL); range_tree_destroy(dn->dn_free_ranges[txgoff]); dn->dn_free_ranges[txgoff] = NULL; mutex_exit(&dn->dn_mtx); diff --git a/usr/src/uts/common/fs/zfs/lua/ldebug.c b/usr/src/uts/common/fs/zfs/lua/ldebug.c index b8ddcff3c6..4ed0094bde 100644 --- a/usr/src/uts/common/fs/zfs/lua/ldebug.c +++ b/usr/src/uts/common/fs/zfs/lua/ldebug.c @@ -467,7 +467,7 @@ static const char *getfuncname (lua_State *L, CallInfo *ci, const char **name) { return getobjname(p, pc, GETARG_A(i), name); case OP_TFORCALL: { /* for iterator */ *name = "for iterator"; - return "for iterator"; + return "for iterator"; } /* all other instructions can call only through metamethods */ case OP_SELF: diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c index 42ba1f9a46..fe53d142c2 100644 --- a/usr/src/uts/common/fs/zfs/metaslab.c +++ b/usr/src/uts/common/fs/zfs/metaslab.c @@ -2414,7 +2414,7 @@ metaslab_load_impl(metaslab_t *msp) msp->ms_max_size = metaslab_largest_allocatable(msp); ASSERT3U(max_size, <=, msp->ms_max_size); hrtime_t load_end = gethrtime(); - msp->ms_load_time = load_end; + msp->ms_load_time = load_end; if (zfs_flags & ZFS_DEBUG_LOG_SPACEMAP) { zfs_dbgmsg("loading: txg %llu, spa %s, vdev_id %llu, " "ms_id %llu, smp_length %llu, " diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index fc08eebbc0..a040fbfea5 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -2408,7 +2408,8 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) spa->spa_loaded_ts.tv_nsec = 0; } if (error != EBADF) { - zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0); + (void) zfs_ereport_post(ereport, spa, + NULL, NULL, NULL, 0, 0); } } spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index 4719696ca4..ae814208fd 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -280,7 +280,8 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) * resource issues are resolved. */ if (target->spa_ccw_fail_time == 0) { - zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, + (void) zfs_ereport_post( + FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, target, NULL, NULL, NULL, 0, 0); } target->spa_ccw_fail_time = gethrtime(); diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index e82b309537..254af68099 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -1365,7 +1365,7 @@ vdev_probe_done(zio_t *zio) } else { ASSERT(zio->io_error != 0); vdev_dbgmsg(vd, "failed probe"); - zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, + (void) zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, spa, vd, NULL, NULL, 0, 0); zio->io_error = SET_ERROR(ENXIO); } @@ -1717,7 +1717,8 @@ vdev_open(vdev_t *vd) */ if (ashift > vd->vdev_top->vdev_ashift && vd->vdev_ops->vdev_op_leaf) { - zfs_ereport_post(FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT, + (void) zfs_ereport_post( + FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT, spa, vd, NULL, NULL, 0, 0); } @@ -4408,7 +4409,7 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) class = FM_EREPORT_ZFS_DEVICE_UNKNOWN; } - zfs_ereport_post(class, spa, vd, NULL, NULL, + (void) zfs_ereport_post(class, spa, vd, NULL, NULL, save_state, 0); } diff --git a/usr/src/uts/common/fs/zfs/vdev_indirect.c b/usr/src/uts/common/fs/zfs/vdev_indirect.c index effea61bc6..6c636dd4d2 100644 --- a/usr/src/uts/common/fs/zfs/vdev_indirect.c +++ b/usr/src/uts/common/fs/zfs/vdev_indirect.c @@ -1382,8 +1382,8 @@ vdev_indirect_checksum_error(zio_t *zio, void *bad_buf = abd_borrow_buf_copy(ic->ic_data, is->is_size); abd_t *good_abd = is->is_good_child->ic_data; void *good_buf = abd_borrow_buf_copy(good_abd, is->is_size); - zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, zio, - is->is_target_offset, is->is_size, good_buf, bad_buf, &zbc); + (void) zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, + zio, is->is_target_offset, is->is_size, good_buf, bad_buf, &zbc); abd_return_buf(ic->ic_data, bad_buf, is->is_size); abd_return_buf(good_abd, good_buf, is->is_size); } @@ -1459,7 +1459,7 @@ vdev_indirect_all_checksum_errors(zio_t *zio) vd->vdev_stat.vs_checksum_errors++; mutex_exit(&vd->vdev_stat_lock); - zfs_ereport_post_checksum(zio->io_spa, vd, + (void) zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, zio, is->is_target_offset, is->is_size, NULL, NULL, NULL); } diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c index e4db03ce89..381c2ff84f 100644 --- a/usr/src/uts/common/fs/zfs/vdev_raidz.c +++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c @@ -1968,7 +1968,7 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data) zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - zfs_ereport_post_checksum(zio->io_spa, vd, + (void) zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, rc->rc_abd, bad_data, &zbc); } diff --git a/usr/src/uts/common/fs/zfs/zfs_fm.c b/usr/src/uts/common/fs/zfs/zfs_fm.c index dd854c12e1..2118fd549e 100644 --- a/usr/src/uts/common/fs/zfs/zfs_fm.c +++ b/usr/src/uts/common/fs/zfs/zfs_fm.c @@ -735,7 +735,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb, report->zcr_length = length; #ifdef _KERNEL - zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector, + (void) zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector, FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length); if (report->zcr_ereport == NULL) { diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index 99011b83b4..c016b5c1ea 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -4839,7 +4839,7 @@ zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, { if (vp->v_type == VDIR) return (0); - return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); + return ((*noffp < 0) ? EINVAL : 0); } /* diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index 5215a58bf2..9981263343 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -483,7 +483,7 @@ error: zio->io_error = SET_ERROR(EIO); if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { spa_log_error(spa, &zio->io_bookmark); - zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, spa, NULL, &zio->io_bookmark, zio, 0, 0); } } else { @@ -1995,7 +1995,7 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason) "failure and has been suspended; `zpool clear` will be required " "before the pool can be written to.", spa_name(spa)); - zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, + (void) zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, NULL, 0, 0); mutex_enter(&spa->spa_suspend_lock); @@ -4265,7 +4265,7 @@ zio_done(zio_t *zio) zio->io_vd->vdev_stat.vs_slow_ios++; mutex_exit(&zio->io_vd->vdev_stat_lock); - zfs_ereport_post(FM_EREPORT_ZFS_DELAY, + (void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0, 0); } @@ -4280,7 +4280,7 @@ zio_done(zio_t *zio) * device is currently unavailable. */ if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd)) - zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, + (void) zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, &zio->io_bookmark, zio, 0, 0); if ((zio->io_error == EIO || !(zio->io_flags & @@ -4291,7 +4291,7 @@ zio_done(zio_t *zio) * error and generate a logical data ereport. */ spa_log_error(spa, &zio->io_bookmark); - zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, + (void) zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, &zio->io_bookmark, zio, 0, 0); } } diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 2e684a5ff0..2495fb015d 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -1161,10 +1161,10 @@ zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, ASSERT(size <= zv->zv_volblocksize); /* Locate the extent this belongs to */ - ze = list_head(&zv->zv_extents); - while (offset >= ze->ze_nblks * zv->zv_volblocksize) { + for (ze = list_head(&zv->zv_extents); + ze != NULL && offset >= ze->ze_nblks * zv->zv_volblocksize; + ze = list_next(&zv->zv_extents, ze)) { offset -= ze->ze_nblks * zv->zv_volblocksize; - ze = list_next(&zv->zv_extents, ze); } if (ze == NULL) @@ -1232,7 +1232,7 @@ zvol_strategy(buf_t *bp) addr = bp->b_un.b_addr; resid = bp->b_bcount; - if (resid > 0 && (off < 0 || off >= volsize)) { + if (resid > 0 && off >= volsize) { bioerror(bp, EIO); biodone(bp); return (0); diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c index 4f3ec2d817..69af77db9a 100644 --- a/usr/src/uts/common/inet/ip/ipclassifier.c +++ b/usr/src/uts/common/inet/ip/ipclassifier.c @@ -22,6 +22,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Joyent, Inc. * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2020 Joyent, Inc. */ /* @@ -2772,7 +2773,11 @@ conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie) return (NULL); } - mutex_exit(&connp->conn_lock); + /* + * Continue to hold conn_lock because we don't want to race with an + * in-progress close, which will have set-to-NULL (and destroyed + * upper_handle, aka sonode (and vnode)) BEFORE setting CONN_CLOSING. + */ if (connp->conn_upper_handle != NULL) { vn = (*connp->conn_upcalls->su_get_vnode) @@ -2784,6 +2789,8 @@ conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie) flags |= MIB2_SOCKINFO_STREAM; } + mutex_exit(&connp->conn_lock); + if (vn == NULL || VOP_GETATTR(vn, &attr, 0, CRED(), NULL) != 0) { if (vn != NULL) VN_RELE(vn); diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c index e0efbbf3ce..4b4e88dcf6 100644 --- a/usr/src/uts/common/inet/ip/ipsecesp.c +++ b/usr/src/uts/common/inet/ip/ipsecesp.c @@ -1843,6 +1843,7 @@ esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira, ipsec_stack_t *ipss = ns->netstack_ipsec; ipsecesp_stack_t *espstack = ns->netstack_ipsecesp; + mp = NULL; do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; force = (assoc->ipsa_flags & IPSA_F_ASYNC); @@ -2172,6 +2173,7 @@ esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc, esp3dbg(espstack, ("esp_submit_req_outbound:%s", is_natt ? "natt" : "not natt")); + mp = NULL; do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL; do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE; force = (assoc->ipsa_flags & IPSA_F_ASYNC); @@ -2441,6 +2443,7 @@ esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa) * Reality check.... */ ipha = (ipha_t *)data_mp->b_rptr; /* So we can call esp_acquire(). */ + ip6h = (ip6_t *)ipha; if (ixa->ixa_flags & IXAF_IS_IPV4) { ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); @@ -2455,7 +2458,6 @@ esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa) ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); af = AF_INET6; - ip6h = (ip6_t *)ipha; bzero(&ipp, sizeof (ipp)); divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL); if (ipp.ipp_dstopts != NULL && diff --git a/usr/src/uts/common/inet/ip/sadb.c b/usr/src/uts/common/inet/ip/sadb.c index 288c0e3e18..5f1d1c96ee 100644 --- a/usr/src/uts/common/inet/ip/sadb.c +++ b/usr/src/uts/common/inet/ip/sadb.c @@ -1067,6 +1067,15 @@ sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg) int srcidsize, dstidsize, senslen, osenslen; sa_family_t fam, pfam; /* Address family for SADB_EXT_ADDRESS */ /* src/dst and proxy sockaddrs. */ + + authsize = 0; + encrsize = 0; + pfam = 0; + srcidsize = 0; + dstidsize = 0; + paddrsize = 0; + senslen = 0; + osenslen = 0; /* * The following are pointers into the PF_KEY message this PF_KEY * message creates. @@ -1100,6 +1109,7 @@ sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg) */ alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) + sizeof (sadb_lifetime_t); + otherspi = 0; fam = ipsa->ipsa_addrfam; switch (fam) { @@ -1770,6 +1780,8 @@ sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial, (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) || (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM)); + diagnostic = 0; + /* Assign both sockaddrs, the compiler will do the right thing. */ sin = (struct sockaddr_in *)(addr + 1); sin6 = (struct sockaddr_in6 *)(addr + 1); diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 554fe8b78f..88d558fd10 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -21,10 +21,10 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013, 2017 by Delphix. All rights reserved. * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. + * Copyright 2020 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -1018,10 +1018,23 @@ finish: /* If we have an upper handle (socket), release it */ if (IPCL_IS_NONSTR(connp)) { - ASSERT(connp->conn_upper_handle != NULL); - (*connp->conn_upcalls->su_closed)(connp->conn_upper_handle); + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + /* + * Set these to NULL first because closed() will free upper + * structures. Acquire conn_lock because an external caller + * like conn_get_socket_info() will upcall if these are + * non-NULL. + */ + mutex_enter(&connp->conn_lock); connp->conn_upper_handle = NULL; connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); + upcalls->su_closed(handle); } } @@ -1435,13 +1448,26 @@ tcp_free(tcp_t *tcp) * nothing to do other than clearing the field. */ if (connp->conn_upper_handle != NULL) { + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + /* + * Set these to NULL first because closed() will free upper + * structures. Acquire conn_lock because an external caller + * like conn_get_socket_info() will upcall if these are + * non-NULL. + */ + mutex_enter(&connp->conn_lock); + connp->conn_upper_handle = NULL; + connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); if (IPCL_IS_NONSTR(connp)) { - (*connp->conn_upcalls->su_closed)( - connp->conn_upper_handle); + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + upcalls->su_closed(handle); tcp->tcp_detached = B_TRUE; } - connp->conn_upper_handle = NULL; - connp->conn_upcalls = NULL; } } diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c index 7a0472f3dd..086668f435 100644 --- a/usr/src/uts/common/inet/tcp/tcp_output.c +++ b/usr/src/uts/common/inet/tcp/tcp_output.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2017 by Delphix. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. */ /* This file contains all TCP output processing functions. */ @@ -1677,11 +1677,23 @@ finish: /* non-STREAM socket, release the upper handle */ if (IPCL_IS_NONSTR(connp)) { - ASSERT(connp->conn_upper_handle != NULL); - (*connp->conn_upcalls->su_closed) - (connp->conn_upper_handle); + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + /* + * Set these to NULL first because closed() will free + * upper structures. Acquire conn_lock because an + * external caller like conn_get_socket_info() will + * upcall if these are non-NULL. + */ + mutex_enter(&connp->conn_lock); connp->conn_upper_handle = NULL; connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); + upcalls->su_closed(handle); } } diff --git a/usr/src/uts/common/io/cxgbe/t4nex/adapter.h b/usr/src/uts/common/io/cxgbe/t4nex/adapter.h index 48edc44341..1192eeb43e 100644 --- a/usr/src/uts/common/io/cxgbe/t4nex/adapter.h +++ b/usr/src/uts/common/io/cxgbe/t4nex/adapter.h @@ -559,6 +559,10 @@ struct adapter { kmutex_t sfl_lock; /* same cache-line as sc_lock? but that's ok */ TAILQ_HEAD(, sge_fl) sfl; timeout_id_t sfl_timer; + + /* Sensors */ + id_t temp_sensor; + id_t volt_sensor; }; enum { diff --git a/usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c b/usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c index ec590228b6..05732e47a1 100644 --- a/usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c +++ b/usr/src/uts/common/io/cxgbe/t4nex/t4_nexus.c @@ -37,6 +37,7 @@ #include <sys/mkdev.h> #include <sys/queue.h> #include <sys/containerof.h> +#include <sys/sensors.h> #include "version.h" #include "common/common.h" @@ -180,6 +181,18 @@ static kmutex_t t4_uld_list_lock; static SLIST_HEAD(, uld_info) t4_uld_list; #endif +static int t4_temperature_read(void *, sensor_ioctl_scalar_t *); +static int t4_voltage_read(void *, sensor_ioctl_scalar_t *); +static const ksensor_ops_t t4_temp_ops = { + .kso_kind = ksensor_kind_temperature, + .kso_scalar = t4_temperature_read +}; + +static const ksensor_ops_t t4_volt_ops = { + .kso_kind = ksensor_kind_voltage, + .kso_scalar = t4_voltage_read +}; + int _init(void) { @@ -758,7 +771,23 @@ ofld_queues: } sc->flags |= INTR_ALLOCATED; - ASSERT(rc == DDI_SUCCESS); + if ((rc = ksensor_create_scalar_pcidev(dip, SENSOR_KIND_TEMPERATURE, + &t4_temp_ops, sc, "temp", &sc->temp_sensor)) != 0) { + cxgb_printf(dip, CE_WARN, "failed to create temperature " + "sensor: %d", rc); + rc = DDI_FAILURE; + goto done; + } + + if ((rc = ksensor_create_scalar_pcidev(dip, SENSOR_KIND_VOLTAGE, + &t4_volt_ops, sc, "vdd", &sc->volt_sensor)) != 0) { + cxgb_printf(dip, CE_WARN, "failed to create voltage " + "sensor: %d", rc); + rc = DDI_FAILURE; + goto done; + } + + ddi_report_dev(dip); /* @@ -849,6 +878,7 @@ t4_devo_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) } /* Safe to call no matter what */ + (void) ksensor_remove(dip, KSENSOR_ALL_IDS); ddi_prop_remove_all(dip); ddi_remove_minor_node(dip, NULL); @@ -2919,3 +2949,76 @@ t4_iterate(void (*func)(int, void *), void *arg) } #endif + +static int +t4_sensor_read(struct adapter *sc, uint32_t diag, uint32_t *valp) +{ + int rc; + struct port_info *pi = sc->port[0]; + uint32_t param, val; + + rc = begin_synchronized_op(pi, 1, 1); + if (rc != 0) { + return (rc); + } + param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | + V_FW_PARAMS_PARAM_Y(diag); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); + end_synchronized_op(pi, 1); + + if (rc != 0) { + return (rc); + } + + if (val == 0) { + return (EIO); + } + + *valp = val; + return (0); +} + +static int +t4_temperature_read(void *arg, sensor_ioctl_scalar_t *scalar) +{ + int ret; + struct adapter *sc = arg; + uint32_t val; + + ret = t4_sensor_read(sc, FW_PARAM_DEV_DIAG_TMP, &val); + if (ret != 0) { + return (ret); + } + + /* + * The device measures temperature in units of 1 degree Celsius. We + * don't know its precision. + */ + scalar->sis_unit = SENSOR_UNIT_CELSIUS; + scalar->sis_gran = 1; + scalar->sis_prec = 0; + scalar->sis_value = val; + + return (0); +} + +static int +t4_voltage_read(void *arg, sensor_ioctl_scalar_t *scalar) +{ + int ret; + struct adapter *sc = arg; + uint32_t val; + + ret = t4_sensor_read(sc, FW_PARAM_DEV_DIAG_VDD, &val); + if (ret != 0) { + return (ret); + } + + scalar->sis_unit = SENSOR_UNIT_VOLTS; + scalar->sis_gran = 1000; + scalar->sis_prec = 0; + scalar->sis_value = val; + + return (0); +} diff --git a/usr/src/uts/common/io/igb/igb_sensor.c b/usr/src/uts/common/io/igb/igb_sensor.c index b233af2a92..3b41a853c0 100644 --- a/usr/src/uts/common/io/igb/igb_sensor.c +++ b/usr/src/uts/common/io/igb/igb_sensor.c @@ -72,7 +72,7 @@ #define EMC1413_REG_EXT3_DIODE_LO 0x2b static int -igb_sensor_reg_temp(void *arg, sensor_ioctl_temperature_t *temp) +igb_sensor_reg_temperature(void *arg, sensor_ioctl_scalar_t *scalar) { igb_t *igb = arg; uint32_t reg; @@ -87,17 +87,17 @@ igb_sensor_reg_temp(void *arg, sensor_ioctl_temperature_t *temp) return (EIO); } - temp->sit_unit = SENSOR_UNIT_CELSIUS; - temp->sit_gran = E1000_THMJT_RESOLUTION; - temp->sit_prec = E1000_THMJT_PRECISION; - temp->sit_temp = E1000_THMJT_TEMP(reg); + scalar->sis_unit = SENSOR_UNIT_CELSIUS; + scalar->sis_gran = E1000_THMJT_RESOLUTION; + scalar->sis_prec = E1000_THMJT_PRECISION; + scalar->sis_value = E1000_THMJT_TEMP(reg); return (0); } static const ksensor_ops_t igb_sensor_reg_ops = { .kso_kind = ksensor_kind_temperature, - .kso_temp = igb_sensor_reg_temp + .kso_scalar = igb_sensor_reg_temperature }; static boolean_t @@ -106,8 +106,9 @@ igb_sensors_create_minors(igb_t *igb) int ret; igb_sensors_t *sp = &igb->igb_sensors; - if ((ret = ksensor_create_temp_pcidev(igb->dip, &igb_sensor_reg_ops, - igb, "builtin", &sp->isn_reg_ksensor)) != 0) { + if ((ret = ksensor_create_scalar_pcidev(igb->dip, + SENSOR_KIND_TEMPERATURE, &igb_sensor_reg_ops, igb, "builtin", + &sp->isn_reg_ksensor)) != 0) { igb_log(igb, IGB_LOG_ERROR, "failed to create main sensor: %d", ret); return (B_FALSE); diff --git a/usr/src/uts/common/io/ksensor/ksensor_drv.c b/usr/src/uts/common/io/ksensor/ksensor_drv.c index 6810e11758..70e99287a2 100644 --- a/usr/src/uts/common/io/ksensor/ksensor_drv.c +++ b/usr/src/uts/common/io/ksensor/ksensor_drv.c @@ -90,15 +90,15 @@ ksensor_ioctl_kind(minor_t min, intptr_t arg, int mode) } static int -ksensor_ioctl_temp(minor_t min, intptr_t arg, int mode) +ksensor_ioctl_scalar(minor_t min, intptr_t arg, int mode) { int ret; - sensor_ioctl_temperature_t temp; + sensor_ioctl_scalar_t scalar; - bzero(&temp, sizeof (temp)); - ret = ksensor_op_temperature((id_t)min, &temp); + bzero(&scalar, sizeof (scalar)); + ret = ksensor_op_scalar((id_t)min, &scalar); if (ret == 0) { - if (ddi_copyout(&temp, (void *)arg, sizeof (temp), + if (ddi_copyout(&scalar, (void *)arg, sizeof (scalar), mode & FKIOCTL) != 0) { ret = EFAULT; } @@ -118,10 +118,10 @@ ksensor_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, m = getminor(dev); switch (cmd) { - case SENSOR_IOCTL_TYPE: + case SENSOR_IOCTL_KIND: return (ksensor_ioctl_kind(m, arg, mode)); - case SENSOR_IOCTL_TEMPERATURE: - return (ksensor_ioctl_temp(m, arg, mode)); + case SENSOR_IOCTL_SCALAR: + return (ksensor_ioctl_scalar(m, arg, mode)); default: return (ENOTTY); } diff --git a/usr/src/uts/common/io/ksensor/ksensor_test.c b/usr/src/uts/common/io/ksensor/ksensor_test.c index ea71ab5559..a98a8b77eb 100644 --- a/usr/src/uts/common/io/ksensor/ksensor_test.c +++ b/usr/src/uts/common/io/ksensor/ksensor_test.c @@ -32,21 +32,53 @@ typedef struct ksensor_test { id_t kt_sensor3; id_t kt_sensor4; id_t kt_sensor5; + id_t kt_volt; + id_t kt_current; } ksensor_test_t; static int -ksensor_test_temperature(void *arg, sensor_ioctl_temperature_t *temp) +ksensor_test_temp(void *arg, sensor_ioctl_scalar_t *scalar) { - temp->sit_unit = SENSOR_UNIT_CELSIUS; - temp->sit_gran = 4; - temp->sit_prec = -2; - temp->sit_temp = 23; + scalar->sis_unit = SENSOR_UNIT_CELSIUS; + scalar->sis_gran = 4; + scalar->sis_prec = -2; + scalar->sis_value = 23; return (0); } static const ksensor_ops_t ksensor_test_temp_ops = { - ksensor_kind_temperature, - ksensor_test_temperature + .kso_kind = ksensor_kind_temperature, + .kso_scalar = ksensor_test_temp +}; + +static int +ksensor_test_volt(void *arg, sensor_ioctl_scalar_t *scalar) +{ + scalar->sis_unit = SENSOR_UNIT_VOLTS; + scalar->sis_gran = 1000; + scalar->sis_prec = 0; + scalar->sis_value = 3300; + return (0); +} + +static const ksensor_ops_t ksensor_test_volt_ops = { + .kso_kind = ksensor_kind_voltage, + .kso_scalar = ksensor_test_volt +}; + +static int +ksensor_test_current(void *arg, sensor_ioctl_scalar_t *scalar) +{ + scalar->sis_unit = SENSOR_UNIT_AMPS; + scalar->sis_gran = 10; + scalar->sis_prec = 0; + scalar->sis_value = 5; + return (0); +} + +static const ksensor_ops_t ksensor_test_current_ops = { + .kso_kind = ksensor_kind_current, + .kso_scalar = ksensor_test_current }; static int @@ -56,14 +88,14 @@ ksensor_test_kind_eio(void *arg, sensor_ioctl_kind_t *kindp) } static int -ksensor_test_temp_eio(void *arg, sensor_ioctl_temperature_t *tempp) +ksensor_test_temp_eio(void *arg, sensor_ioctl_scalar_t *scalar) { return (EIO); } static const ksensor_ops_t ksensor_test_eio_ops = { - ksensor_test_kind_eio, - ksensor_test_temp_eio + .kso_kind = ksensor_test_kind_eio, + .kso_scalar = ksensor_test_temp_eio }; static int @@ -107,7 +139,7 @@ ksensor_test_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) ddi_get_instance(dip)); if ((ret = ksensor_create(dip, &ksensor_test_temp_ops, NULL, buf, "ddi_sensor:test", &kt->kt_sensor3)) != 0) { - dev_err(dip, CE_WARN, "failed to attatch sensor %s: %d", buf, + dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf, ret); goto err; } @@ -116,7 +148,7 @@ ksensor_test_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) ddi_get_instance(dip)); if ((ret = ksensor_create(dip, &ksensor_test_temp_ops, NULL, buf, "ddi_sensor:test", &kt->kt_sensor4)) != 0) { - dev_err(dip, CE_WARN, "failed to attatch sensor %s: %d", buf, + dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf, ret); goto err; } @@ -125,7 +157,25 @@ ksensor_test_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) ddi_get_instance(dip)); if ((ret = ksensor_create(dip, &ksensor_test_eio_ops, NULL, buf, "ddi_sensor:test", &kt->kt_sensor5)) != 0) { - dev_err(dip, CE_WARN, "failed to attatch sensor %s: %d", buf, + dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf, + ret); + goto err; + } + + (void) snprintf(buf, sizeof (buf), "test.volt.%d.1", + ddi_get_instance(dip)); + if ((ret = ksensor_create(dip, &ksensor_test_volt_ops, NULL, buf, + "ddi_sensor:test", &kt->kt_volt)) != 0) { + dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf, + ret); + goto err; + } + + (void) snprintf(buf, sizeof (buf), "test.current.%d.1", + ddi_get_instance(dip)); + if ((ret = ksensor_create(dip, &ksensor_test_current_ops, NULL, buf, + "ddi_sensor:test", &kt->kt_current)) != 0) { + dev_err(dip, CE_WARN, "failed to attach sensor %s: %d", buf, ret); goto err; } diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c index dbad9be958..90964d2fd1 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx.c @@ -1066,6 +1066,11 @@ mlxcx_teardown(mlxcx_t *mlxp) mlxcx_intr_disable(mlxp); } + if (mlxp->mlx_attach & MLXCX_ATTACH_SENSORS) { + mlxcx_teardown_sensors(mlxp); + mlxp->mlx_attach &= ~MLXCX_ATTACH_SENSORS; + } + if (mlxp->mlx_attach & MLXCX_ATTACH_CHKTIMERS) { mlxcx_teardown_checktimers(mlxp); mlxp->mlx_attach &= ~MLXCX_ATTACH_CHKTIMERS; @@ -1800,7 +1805,7 @@ mlxcx_setup_ports(mlxcx_t *mlxp) p->mlx_port_event.mla_mlx = mlxp; p->mlx_port_event.mla_port = p; mutex_init(&p->mlx_port_event.mla_mtx, NULL, - MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri)); + MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_async_intr_pri)); p->mlp_init |= MLXCX_PORT_INIT; mutex_init(&p->mlp_mtx, NULL, MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri)); @@ -2716,7 +2721,7 @@ mlxcx_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) for (i = 0; i <= MLXCX_FUNC_ID_MAX; i++) { mlxp->mlx_npages_req[i].mla_mlx = mlxp; mutex_init(&mlxp->mlx_npages_req[i].mla_mtx, NULL, - MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri)); + MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_async_intr_pri)); } mlxp->mlx_attach |= MLXCX_ATTACH_ASYNC_TQ; @@ -2869,6 +2874,11 @@ mlxcx_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) } mlxp->mlx_attach |= MLXCX_ATTACH_CHKTIMERS; + if (!mlxcx_setup_sensors(mlxp)) { + goto err; + } + mlxp->mlx_attach |= MLXCX_ATTACH_SENSORS; + /* * Finally, tell MAC that we exist! */ @@ -2913,7 +2923,6 @@ static struct dev_ops mlxcx_dev_ops = { .devo_attach = mlxcx_attach, .devo_detach = mlxcx_detach, .devo_reset = nodev, - .devo_power = ddi_power, .devo_quiesce = ddi_quiesce_not_supported, .devo_cb_ops = &mlxcx_cb_ops }; diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h index 77d36447c6..e28fe89806 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx.h +++ b/usr/src/uts/common/io/mlxcx/mlxcx.h @@ -1009,6 +1009,15 @@ typedef struct { uint64_t mldp_wq_check_interval_sec; } mlxcx_drv_props_t; +typedef struct { + mlxcx_t *mlts_mlx; + uint8_t mlts_index; + id_t mlts_ksensor; + int16_t mlts_value; + int16_t mlts_max_value; + uint8_t mlts_name[MLXCX_MTMP_NAMELEN]; +} mlxcx_temp_sensor_t; + typedef enum { MLXCX_ATTACH_FM = 1 << 0, MLXCX_ATTACH_PCI_CONFIG = 1 << 1, @@ -1028,6 +1037,7 @@ typedef enum { MLXCX_ATTACH_CAPS = 1 << 15, MLXCX_ATTACH_CHKTIMERS = 1 << 16, MLXCX_ATTACH_ASYNC_TQ = 1 << 17, + MLXCX_ATTACH_SENSORS = 1 << 18 } mlxcx_attach_progress_t; struct mlxcx { @@ -1082,6 +1092,7 @@ struct mlxcx { * Interrupts */ uint_t mlx_intr_pri; + uint_t mlx_async_intr_pri; uint_t mlx_intr_type; /* always MSI-X */ int mlx_intr_count; size_t mlx_intr_size; /* allocation size */ @@ -1171,6 +1182,12 @@ struct mlxcx { ddi_periodic_t mlx_eq_checktimer; ddi_periodic_t mlx_cq_checktimer; ddi_periodic_t mlx_wq_checktimer; + + /* + * Sensors + */ + uint8_t mlx_temp_nsensors; + mlxcx_temp_sensor_t *mlx_temp_sensors; }; /* @@ -1446,6 +1463,12 @@ extern const char *mlxcx_port_status_string(mlxcx_port_status_t); extern const char *mlxcx_event_name(mlxcx_event_t); +/* + * Sensor Functions + */ +extern boolean_t mlxcx_setup_sensors(mlxcx_t *); +extern void mlxcx_teardown_sensors(mlxcx_t *); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c index c8eb1335ea..32c40ec3ea 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c @@ -667,7 +667,8 @@ static void mlxcx_cmd_init(mlxcx_t *mlxp, mlxcx_cmd_t *cmd) { bzero(cmd, sizeof (*cmd)); - mutex_init(&cmd->mlcmd_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&cmd->mlcmd_lock, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_async_intr_pri)); cv_init(&cmd->mlcmd_cv, NULL, CV_DRIVER, NULL); cmd->mlcmd_token = id_alloc(mlxp->mlx_cmd.mcmd_tokens); cmd->mlcmd_poll = mlxp->mlx_cmd.mcmd_polled; @@ -1687,6 +1688,10 @@ mlxcx_reg_name(mlxcx_register_id_t rid) return ("PPCNT"); case MLXCX_REG_PPLM: return ("PPLM"); + case MLXCX_REG_MTCAP: + return ("MTCAP"); + case MLXCX_REG_MTMP: + return ("MTMP"); default: return ("???"); } @@ -1736,6 +1741,12 @@ mlxcx_cmd_access_register(mlxcx_t *mlxp, mlxcx_cmd_reg_opmod_t opmod, case MLXCX_REG_PPLM: dsize = sizeof (mlxcx_reg_pplm_t); break; + case MLXCX_REG_MTCAP: + dsize = sizeof (mlxcx_reg_mtcap_t); + break; + case MLXCX_REG_MTMP: + dsize = sizeof (mlxcx_reg_mtmp_t); + break; default: dsize = 0; VERIFY(0); diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c index 89645bb2b1..941eb0f9e7 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c @@ -809,19 +809,32 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh) if (wq->mlwq_state & MLXCX_WQ_BUFFERS) { + list_t cq_buffers; + + /* + * Take the buffers away from the CQ. If the CQ is being + * processed and the WQ has been stopped, a completion + * which does not match to a buffer will be ignored. + */ + list_create(&cq_buffers, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_cq_entry)); + + list_move_tail(&cq_buffers, &cq->mlcq_buffers); + + mutex_enter(&cq->mlcq_bufbmtx); + list_move_tail(&cq_buffers, &cq->mlcq_buffers_b); + mutex_exit(&cq->mlcq_bufbmtx); + + cq->mlcq_bufcnt = 0; + mutex_exit(&wq->mlwq_mtx); mutex_exit(&cq->mlcq_mtx); /* Return any outstanding buffers to the free pool. */ - while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) { + while ((buf = list_remove_head(&cq_buffers)) != NULL) { mlxcx_buf_return_chain(mlxp, buf, B_FALSE); } - mutex_enter(&cq->mlcq_bufbmtx); - while ((buf = list_remove_head(&cq->mlcq_buffers_b)) != NULL) { - mlxcx_buf_return_chain(mlxp, buf, B_FALSE); - } - mutex_exit(&cq->mlcq_bufbmtx); - cq->mlcq_bufcnt = 0; + list_destroy(&cq_buffers); s = wq->mlwq_bufs; mutex_enter(&s->mlbs_mtx); diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c index f79c148d20..53ea4d683e 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c @@ -12,6 +12,7 @@ /* * Copyright (c) 2020, the University of Queensland * Copyright 2020 RackTop Systems, Inc. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -922,6 +923,20 @@ lookagain: if (added) goto lookagain; + /* + * This check could go just after the lookagain + * label, but it is a hot code path so we don't + * want to unnecessarily grab a lock and check + * a flag for a relatively rare event (the ring + * being stopped). + */ + mutex_enter(&wq->mlwq_mtx); + if ((wq->mlwq_state & MLXCX_WQ_STARTED) == 0) { + mutex_exit(&wq->mlwq_mtx); + goto nextcq; + } + mutex_exit(&wq->mlwq_mtx); + buf = list_head(&mlcq->mlcq_buffers); mlxcx_warn(mlxp, "got completion on CQ %x but " "no buffer matching wqe found: %x (first " @@ -1165,6 +1180,7 @@ mlxcx_intr_setup(mlxcx_t *mlxp) ret = ddi_intr_get_supported_types(dip, &types); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to get supported interrupt types"); return (B_FALSE); } @@ -1176,15 +1192,21 @@ mlxcx_intr_setup(mlxcx_t *mlxp) ret = ddi_intr_get_nintrs(dip, DDI_INTR_TYPE_MSIX, &nintrs); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to get number of interrupts"); return (B_FALSE); } if (nintrs < 2) { - mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx " + mlxcx_warn(mlxp, "%d MSI-X interrupts supported, but mlxcx " "requires 2", nintrs); return (B_FALSE); } ret = ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX, &navail); + if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, + "Failed to get number of available interrupts"); + return (B_FALSE); + } if (navail < 2) { mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx " "requires 2", navail); @@ -1203,10 +1225,14 @@ mlxcx_intr_setup(mlxcx_t *mlxp) ret = ddi_intr_alloc(dip, mlxp->mlx_intr_handles, DDI_INTR_TYPE_MSIX, 0, navail, &mlxp->mlx_intr_count, DDI_INTR_ALLOC_NORMAL); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to allocate %d interrupts", navail); mlxcx_intr_teardown(mlxp); return (B_FALSE); } if (mlxp->mlx_intr_count < mlxp->mlx_intr_cq0 + 1) { + mlxcx_warn(mlxp, "%d MSI-X interrupts allocated, but mlxcx " + "requires %d", mlxp->mlx_intr_count, + mlxp->mlx_intr_cq0 + 1); mlxcx_intr_teardown(mlxp); return (B_FALSE); } @@ -1214,10 +1240,29 @@ mlxcx_intr_setup(mlxcx_t *mlxp) ret = ddi_intr_get_pri(mlxp->mlx_intr_handles[0], &mlxp->mlx_intr_pri); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to get interrupt priority"); mlxcx_intr_teardown(mlxp); return (B_FALSE); } + /* + * Set the interrupt priority for the asynchronous handler higher + * than the ring handlers. Some operations which issue commands, + * and thus rely on the async interrupt handler for posting + * completion, do so with a CQ mutex held. The CQ mutex is also + * acquired during ring processing, so if the ring processing vector + * happens to be assigned to the same CPU as the async vector + * it can hold off the async interrupt thread and lead to a deadlock. + * By assigning a higher priority to the async vector, it will + * always be dispatched. + */ + mlxp->mlx_async_intr_pri = mlxp->mlx_intr_pri; + if (mlxp->mlx_async_intr_pri < LOCK_LEVEL) { + mlxp->mlx_async_intr_pri++; + } else { + mlxp->mlx_intr_pri--; + } + mlxp->mlx_eqs_size = mlxp->mlx_intr_count * sizeof (mlxcx_event_queue_t); mlxp->mlx_eqs = kmem_zalloc(mlxp->mlx_eqs_size, KM_SLEEP); @@ -1227,8 +1272,11 @@ mlxcx_intr_setup(mlxcx_t *mlxp) * mutex and avl tree to be init'ed - so do it now. */ for (i = 0; i < mlxp->mlx_intr_count; ++i) { + uint_t pri = (i == 0) ? mlxp->mlx_async_intr_pri : + mlxp->mlx_intr_pri; + mutex_init(&mlxp->mlx_eqs[i].mleq_mtx, NULL, MUTEX_DRIVER, - DDI_INTR_PRI(mlxp->mlx_intr_pri)); + DDI_INTR_PRI(pri)); cv_init(&mlxp->mlx_eqs[i].mleq_cv, NULL, CV_DRIVER, NULL); if (i < mlxp->mlx_intr_cq0) @@ -1239,9 +1287,38 @@ mlxcx_intr_setup(mlxcx_t *mlxp) offsetof(mlxcx_completion_queue_t, mlcq_eq_entry)); } + while (mlxp->mlx_async_intr_pri > DDI_INTR_PRI_MIN) { + ret = ddi_intr_set_pri(mlxp->mlx_intr_handles[0], + mlxp->mlx_async_intr_pri); + if (ret == DDI_SUCCESS) + break; + mlxcx_note(mlxp, + "!Failed to set interrupt priority to %u for " + "async interrupt vector", mlxp->mlx_async_intr_pri); + /* + * If it was not possible to set the IPL for the async + * interrupt to the desired value, then try a lower priority. + * Some PSMs can only accommodate a limited number of vectors + * at eatch priority level (or group of priority levels). Since + * the async priority must be set higher than the ring + * handlers, lower both. The ring handler priority is set + * below. + */ + mlxp->mlx_async_intr_pri--; + mlxp->mlx_intr_pri--; + } + + if (mlxp->mlx_async_intr_pri == DDI_INTR_PRI_MIN) { + mlxcx_warn(mlxp, "Failed to find an interrupt priority for " + "async interrupt vector"); + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[0], mlxcx_intr_async, (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[0]); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to add async interrupt handler"); mlxcx_intr_teardown(mlxp); return (B_FALSE); } @@ -1268,9 +1345,29 @@ mlxcx_intr_setup(mlxcx_t *mlxp) eqt = MLXCX_EQ_TYPE_RX; } + while (mlxp->mlx_intr_pri >= DDI_INTR_PRI_MIN) { + ret = ddi_intr_set_pri(mlxp->mlx_intr_handles[i], + mlxp->mlx_intr_pri); + if (ret == DDI_SUCCESS) + break; + mlxcx_note(mlxp, "!Failed to set interrupt priority to " + "%u for interrupt vector %d", + mlxp->mlx_intr_pri, i); + mlxp->mlx_intr_pri--; + } + if (mlxp->mlx_intr_pri < DDI_INTR_PRI_MIN) { + mlxcx_warn(mlxp, + "Failed to find an interrupt priority for " + "interrupt vector %d", i); + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[i], mlxcx_intr_n, (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[i]); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to add interrupt handler %d", + i); mlxcx_intr_teardown(mlxp); return (B_FALSE); } diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h index 1987ae06ea..4b92de92b8 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h +++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h @@ -2530,6 +2530,30 @@ typedef struct { uint16be_t mlrd_pplm_fec_override_admin_fdr10; } mlxcx_reg_pplm_t; +typedef struct { + uint8_t mlrd_mtcap_rsvd[3]; + uint8_t mlrd_mtcap_sensor_count; + uint8_t mlrd_mtcap_rsvd1[4]; + uint64be_t mlrd_mtcap_sensor_map; +} mlxcx_reg_mtcap_t; + +#define MLXCX_MTMP_NAMELEN 8 + +typedef struct { + uint8_t mlrd_mtmp_rsvd[2]; + uint16be_t mlrd_mtmp_sensor_index; + uint8_t mlrd_mtmp_rsvd1[2]; + uint16be_t mlrd_mtmp_temperature; + bits16_t mlrd_mtmp_max_flags; + uint16be_t mlrd_mtmp_max_temperature; + bits16_t mlrd_mtmp_tee; + uint16be_t mlrd_mtmp_temp_thresh_hi; + uint8_t mlrd_mtmp_rsvd2[2]; + uint16be_t mlrd_mtmp_temp_thresh_lo; + uint8_t mlrd_mtmp_rsvd3[4]; + uint8_t mlrd_mtmp_name[MLXCX_MTMP_NAMELEN]; +} mlxcx_reg_mtmp_t; + typedef enum { MLXCX_REG_PMTU = 0x5003, MLXCX_REG_PTYS = 0x5004, @@ -2540,6 +2564,8 @@ typedef enum { MLXCX_REG_MCIA = 0x9014, MLXCX_REG_PPCNT = 0x5008, MLXCX_REG_PPLM = 0x5023, + MLXCX_REG_MTCAP = 0x9009, + MLXCX_REG_MTMP = 0x900A } mlxcx_register_id_t; typedef union { @@ -2551,6 +2577,8 @@ typedef union { mlxcx_reg_mcia_t mlrd_mcia; mlxcx_reg_ppcnt_t mlrd_ppcnt; mlxcx_reg_pplm_t mlrd_pplm; + mlxcx_reg_mtcap_t mlrd_mtcap; + mlxcx_reg_mtmp_t mlrd_mtmp; } mlxcx_register_data_t; typedef enum { diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_sensor.c b/usr/src/uts/common/io/mlxcx/mlxcx_sensor.c new file mode 100644 index 0000000000..6d2c7d0778 --- /dev/null +++ b/usr/src/uts/common/io/mlxcx/mlxcx_sensor.c @@ -0,0 +1,126 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Oxide Computer Company + */ + +#include <mlxcx.h> +#include <sys/sensors.h> + +/* + * The PRM indicates that the temperature is measured in 1/8th degrees. + */ +#define MLXCX_TEMP_GRAN 8 + +/* + * Read a single temperature sensor entry. The ksensor framework guarantees that + * it will only call this once for a given sensor at any time, though multiple + * sensors can be in parallel. + */ +static int +mlxcx_temperature_read(void *arg, sensor_ioctl_scalar_t *scalar) +{ + boolean_t ok; + uint16_t tmp; + mlxcx_register_data_t data; + mlxcx_temp_sensor_t *sensor = arg; + mlxcx_t *mlxp = sensor->mlts_mlx; + + bzero(&data, sizeof (data)); + data.mlrd_mtmp.mlrd_mtmp_sensor_index = to_be16(sensor->mlts_index); + ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_MTMP, &data); + if (!ok) { + return (EIO); + } + + tmp = from_be16(data.mlrd_mtmp.mlrd_mtmp_temperature); + sensor->mlts_value = (int16_t)tmp; + tmp = from_be16(data.mlrd_mtmp.mlrd_mtmp_max_temperature); + sensor->mlts_max_value = (int16_t)tmp; + bcopy(data.mlrd_mtmp.mlrd_mtmp_name, sensor->mlts_name, + sizeof (sensor->mlts_name)); + + scalar->sis_unit = SENSOR_UNIT_CELSIUS; + scalar->sis_gran = MLXCX_TEMP_GRAN; + scalar->sis_prec = 0; + scalar->sis_value = (int64_t)sensor->mlts_value; + + return (0); +} + +static const ksensor_ops_t mlxcx_temp_ops = { + .kso_kind = ksensor_kind_temperature, + .kso_scalar = mlxcx_temperature_read +}; + +void +mlxcx_teardown_sensors(mlxcx_t *mlxp) +{ + if (mlxp->mlx_temp_nsensors == 0) + return; + (void) ksensor_remove(mlxp->mlx_dip, KSENSOR_ALL_IDS); + kmem_free(mlxp->mlx_temp_sensors, sizeof (mlxcx_temp_sensor_t) * + mlxp->mlx_temp_nsensors); +} + +boolean_t +mlxcx_setup_sensors(mlxcx_t *mlxp) +{ + mlxcx_register_data_t data; + boolean_t ok; + + mlxp->mlx_temp_nsensors = 0; + bzero(&data, sizeof (data)); + ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, + MLXCX_REG_MTCAP, &data); + if (!ok) { + return (B_FALSE); + } + + if (data.mlrd_mtcap.mlrd_mtcap_sensor_count == 0) { + return (B_TRUE); + } + + mlxp->mlx_temp_nsensors = data.mlrd_mtcap.mlrd_mtcap_sensor_count; + mlxp->mlx_temp_sensors = kmem_zalloc(sizeof (mlxcx_temp_sensor_t) * + mlxp->mlx_temp_nsensors, KM_SLEEP); + + for (uint8_t i = 0; i < mlxp->mlx_temp_nsensors; i++) { + char buf[32]; + int ret; + + if (snprintf(buf, sizeof (buf), "temp%u", i) >= sizeof (buf)) { + mlxcx_warn(mlxp, "sensor name %u would overflow " + "internal buffer"); + goto err; + } + + mlxp->mlx_temp_sensors[i].mlts_mlx = mlxp; + mlxp->mlx_temp_sensors[i].mlts_index = i; + + ret = ksensor_create_scalar_pcidev(mlxp->mlx_dip, + SENSOR_KIND_TEMPERATURE, &mlxcx_temp_ops, + &mlxp->mlx_temp_sensors[i], buf, + &mlxp->mlx_temp_sensors[i].mlts_ksensor); + if (ret != 0) { + mlxcx_warn(mlxp, "failed to create temp sensor %s: %d", + buf, ret); + goto err; + } + } + + return (B_TRUE); +err: + mlxcx_teardown_sensors(mlxp); + return (B_FALSE); +} diff --git a/usr/src/uts/common/io/tem.c b/usr/src/uts/common/io/tem.c index 573e10cd66..525aa5f585 100644 --- a/usr/src/uts/common/io/tem.c +++ b/usr/src/uts/common/io/tem.c @@ -524,10 +524,41 @@ tems_check_videomode(struct vis_devinit *tp) } static void -tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width) +tems_setup_font(screen_size_t height, screen_size_t width) { bitmap_data_t *font_data; int i; + + /* + * set_font() will select an appropriate sized font for + * the number of rows and columns selected. If we don't + * have a font that will fit, then it will use the + * default builtin font and adjust the rows and columns + * to fit on the screen. + */ + font_data = set_font(&tems.ts_c_dimension.height, + &tems.ts_c_dimension.width, height, width); + + /* + * To use loaded font, we assign the loaded font data to tems.ts_font. + * In case of next load, the previously loaded data is freed + * when loading the new font. + */ + for (i = 0; i < VFNT_MAPS; i++) { + tems.ts_font.vf_map[i] = + font_data->font->vf_map[i]; + tems.ts_font.vf_map_count[i] = + font_data->font->vf_map_count[i]; + } + + tems.ts_font.vf_bytes = font_data->font->vf_bytes; + tems.ts_font.vf_width = font_data->font->vf_width; + tems.ts_font.vf_height = font_data->font->vf_height; +} + +static void +tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width) +{ int old_blank_buf_size = tems.ts_c_dimension.width * sizeof (*tems.ts_blank_line); @@ -546,6 +577,9 @@ tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width) tems.ts_c_dimension.height = tp->height; tems.ts_callbacks = &tem_safe_text_callbacks; + tems_setup_font(16 * tp->height + BORDER_PIXELS, + 8 * tp->width + BORDER_PIXELS); + break; case VIS_PIXEL: @@ -559,33 +593,11 @@ tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width) } tems.ts_c_dimension.height = (screen_size_t)height; tems.ts_c_dimension.width = (screen_size_t)width; - tems.ts_p_dimension.height = tp->height; tems.ts_p_dimension.width = tp->width; - tems.ts_callbacks = &tem_safe_pix_callbacks; - /* - * set_font() will select a appropriate sized font for - * the number of rows and columns selected. If we don't - * have a font that will fit, then it will use the - * default builtin font. set_font() will adjust the rows - * and columns to fit on the screen. - */ - font_data = set_font(&tems.ts_c_dimension.height, - &tems.ts_c_dimension.width, - tems.ts_p_dimension.height, - tems.ts_p_dimension.width); - - for (i = 0; i < VFNT_MAPS; i++) { - tems.ts_font.vf_map[i] = - font_data->font->vf_map[i]; - tems.ts_font.vf_map_count[i] = - font_data->font->vf_map_count[i]; - } - tems.ts_font.vf_bytes = font_data->font->vf_bytes; - tems.ts_font.vf_width = font_data->font->vf_width; - tems.ts_font.vf_height = font_data->font->vf_height; + tems_setup_font(tp->height, tp->width); tems.ts_p_offset.y = (tems.ts_p_dimension.height - (tems.ts_c_dimension.height * tems.ts_font.vf_height)) / 2; @@ -594,9 +606,7 @@ tems_setup_terminal(struct vis_devinit *tp, size_t height, size_t width) tems.ts_pix_data_size = tems.ts_font.vf_width * tems.ts_font.vf_height; - tems.ts_pix_data_size *= 4; - tems.ts_pdepth = tp->depth; break; @@ -963,6 +973,7 @@ tems_get_initial_color(tem_color_t *pcolor) if (inverse_screen) flags |= TEM_ATTR_SCREEN_REVERSE; +#ifdef _HAVE_TEM_FIRMWARE if (flags != 0) { /* * If either reverse flag is set, the screen is in @@ -980,6 +991,21 @@ tems_get_initial_color(tem_color_t *pcolor) if (pcolor->bg_color == ANSI_COLOR_WHITE) flags |= TEM_ATTR_BRIGHT_BG; } +#else + if (flags != 0) { + if (pcolor->fg_color == ANSI_COLOR_WHITE) + flags |= TEM_ATTR_BRIGHT_BG; + + if (pcolor->fg_color == ANSI_COLOR_BLACK) + flags &= ~TEM_ATTR_BRIGHT_BG; + } else { + /* + * In case of black on white we want bright white for BG. + */ + if (pcolor->bg_color == ANSI_COLOR_WHITE) + flags |= TEM_ATTR_BRIGHT_BG; + } +#endif pcolor->a_flags = flags; } diff --git a/usr/src/uts/common/io/tem_safe.c b/usr/src/uts/common/io/tem_safe.c index 5008d4a4d6..8d47a00d5f 100644 --- a/usr/src/uts/common/io/tem_safe.c +++ b/usr/src/uts/common/io/tem_safe.c @@ -129,9 +129,12 @@ static void tem_safe_copy_area(struct tem_vt_state *tem, screen_pos_t e_col, screen_pos_t e_row, screen_pos_t t_col, screen_pos_t t_row, cred_t *credp, enum called_from called_from); +#if 0 +/* Currently unused */ static void tem_safe_image_display(struct tem_vt_state *, uchar_t *, int, int, screen_pos_t, screen_pos_t, cred_t *, enum called_from); +#endif static void tem_safe_bell(struct tem_vt_state *tem, enum called_from called_from); static void tem_safe_pix_clear_prom_output(struct tem_vt_state *tem, @@ -1568,6 +1571,7 @@ tem_safe_text_display(struct tem_vt_state *tem, term_char_t *string, } } +#if 0 /* * This function is used to blit a rectangular color image, * unperturbed on the underlying framebuffer, to render @@ -1600,6 +1604,7 @@ tem_safe_image_display(struct tem_vt_state *tem, uchar_t *image, mutex_exit(&tem->tvs_lock); mutex_exit(&tems.ts_lock); } +#endif /*ARGSUSED*/ void @@ -2385,12 +2390,22 @@ tem_safe_get_attr(struct tem_vt_state *tem, text_color_t *fg, static void tem_safe_get_color(text_color_t *fg, text_color_t *bg, term_char_t c) { + boolean_t bold_font; + *fg = c.tc_fg_color; *bg = c.tc_bg_color; + bold_font = tems.ts_font.vf_map_count[VFNT_MAP_BOLD] != 0; + + /* + * If we have both normal and bold font components, + * we use bold font for TEM_ATTR_BOLD. + * The bright color is traditionally used with TEM_ATTR_BOLD, + * in case there is no bold font. + */ if (c.tc_fg_color < XLATE_NCOLORS) { - if (TEM_ATTR_ISSET(c.tc_char, - TEM_ATTR_BRIGHT_FG | TEM_ATTR_BOLD)) + if (TEM_ATTR_ISSET(c.tc_char, TEM_ATTR_BRIGHT_FG) || + (TEM_ATTR_ISSET(c.tc_char, TEM_ATTR_BOLD) && !bold_font)) *fg = brt_xlate[c.tc_fg_color]; else *fg = dim_xlate[c.tc_fg_color]; diff --git a/usr/src/uts/common/io/usb/usba/hubdi.c b/usr/src/uts/common/io/usb/usba/hubdi.c index 99d75edce3..5207a51490 100644 --- a/usr/src/uts/common/io/usb/usba/hubdi.c +++ b/usr/src/uts/common/io/usb/usba/hubdi.c @@ -55,48 +55,45 @@ extern boolean_t consconfig_console_is_ready(void); /* * Prototypes for static functions */ -static int usba_hubdi_bus_ctl( - dev_info_t *dip, - dev_info_t *rdip, - ddi_ctl_enum_t op, - void *arg, - void *result); - -static int usba_hubdi_map_fault( - dev_info_t *dip, - dev_info_t *rdip, - struct hat *hat, - struct seg *seg, - caddr_t addr, - struct devpage *dp, - pfn_t pfn, - uint_t prot, - uint_t lock); +static int usba_hubdi_bus_ctl(dev_info_t *dip, + dev_info_t *rdip, + ddi_ctl_enum_t op, + void *arg, + void *result); + +static int usba_hubdi_map_fault(dev_info_t *dip, + dev_info_t *rdip, + struct hat *hat, + struct seg *seg, + caddr_t addr, + struct devpage *dp, + pfn_t pfn, + uint_t prot, + uint_t lock); static int hubd_busop_get_eventcookie(dev_info_t *dip, - dev_info_t *rdip, - char *eventname, - ddi_eventcookie_t *cookie); + dev_info_t *rdip, + char *eventname, + ddi_eventcookie_t *cookie); static int hubd_busop_add_eventcall(dev_info_t *dip, - dev_info_t *rdip, - ddi_eventcookie_t cookie, - void (*callback)(dev_info_t *dip, - ddi_eventcookie_t cookie, void *arg, - void *bus_impldata), - void *arg, ddi_callback_id_t *cb_id); + dev_info_t *rdip, + ddi_eventcookie_t cookie, + void (*callback)(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg, + void *bus_impldata), + void *arg, ddi_callback_id_t *cb_id); static int hubd_busop_remove_eventcall(dev_info_t *dip, - ddi_callback_id_t cb_id); + ddi_callback_id_t cb_id); static int hubd_bus_config(dev_info_t *dip, - uint_t flag, - ddi_bus_config_op_t op, - void *arg, - dev_info_t **child); + uint_t flag, + ddi_bus_config_op_t op, + void *arg, + dev_info_t **child); static int hubd_bus_unconfig(dev_info_t *dip, - uint_t flag, - ddi_bus_config_op_t op, - void *arg); + uint_t flag, + ddi_bus_config_op_t op, + void *arg); static int hubd_bus_power(dev_info_t *dip, void *impl_arg, - pm_bus_power_op_t op, void *arg, void *result); + pm_bus_power_op_t op, void *arg, void *result); static usb_port_t hubd_get_port_num(hubd_t *, struct devctl_iocdata *); static dev_info_t *hubd_get_child_dip(hubd_t *, usb_port_t); @@ -251,14 +248,14 @@ usba_hubdi_unregister(dev_info_t *dip) /*ARGSUSED*/ static int usba_hubdi_map_fault(dev_info_t *dip, - dev_info_t *rdip, - struct hat *hat, - struct seg *seg, - caddr_t addr, - struct devpage *dp, - pfn_t pfn, - uint_t prot, - uint_t lock) + dev_info_t *rdip, + struct hat *hat, + struct seg *seg, + caddr_t addr, + struct devpage *dp, + pfn_t pfn, + uint_t prot, + uint_t lock) { return (DDI_FAILURE); } @@ -269,9 +266,9 @@ usba_hubdi_map_fault(dev_info_t *dip, */ int usba_hubdi_bind_root_hub(dev_info_t *dip, - uchar_t *root_hub_config_descriptor, - size_t config_length, - usb_dev_descr_t *root_hub_device_descriptor) + uchar_t *root_hub_config_descriptor, + size_t config_length, + usb_dev_descr_t *root_hub_device_descriptor) { usba_device_t *usba_device; usba_hcdi_t *hcdi = usba_hcdi_get_hcdi(dip); @@ -1145,10 +1142,10 @@ hubd_post_power(hubd_t *hubd, usb_port_t port, pm_bp_child_pwrchg_t *bpc, */ static int usba_hubdi_bus_ctl(dev_info_t *dip, - dev_info_t *rdip, - ddi_ctl_enum_t op, - void *arg, - void *result) + dev_info_t *rdip, + ddi_ctl_enum_t op, + void *arg, + void *result) { usba_device_t *hub_usba_device = usba_get_usba_device(rdip); dev_info_t *root_hub_dip = hub_usba_device->usb_root_hub_dip; @@ -1294,7 +1291,7 @@ usba_hubdi_bus_ctl(dev_info_t *dip, /* * hubd_config_one: - * enumerate one child according to 'port' + * enumerate one child according to 'port' */ static boolean_t @@ -2625,8 +2622,7 @@ hubd_restore_device_state(dev_info_t *dip, hubd_t *hubd) /* * wait at least 3 frames before accessing devices - * (note that delay's minimal time is one clock tick which - * is 10ms unless hires_tick has been changed) + * (note that delay's minimal time is one clock tick). */ mutex_exit(HUBD_MUTEX(hubd)); delay(drv_usectohz(10000)); @@ -3331,8 +3327,8 @@ hubd_set_hub_depth(hubd_t *hubd) int rval; usb_cr_t completion_reason; usb_cb_flags_t cb_flags; - usba_device_t *ud; - uint16_t depth; + usba_device_t *ud; + uint16_t depth; /* * We only need to set the hub depth devices for hubs that are at least @@ -6044,7 +6040,7 @@ hubd_ready_device(hubd_t *hubd, dev_info_t *child_dip, usba_device_t *child_ud, child_ud->usb_active_cfg_ndx = config_index; child_ud->usb_cfg = child_ud->usb_cfg_array[config_index]; child_ud->usb_cfg_length = config_descriptor.wTotalLength; - child_ud->usb_cfg_value = config_descriptor.bConfigurationValue; + child_ud->usb_cfg_value = config_descriptor.bConfigurationValue; child_ud->usb_n_ifs = config_descriptor.bNumInterfaces; child_ud->usb_dip = child_dip; @@ -6089,11 +6085,11 @@ hubd_ready_device(hubd_t *hubd, dev_info_t *child_dip, usba_device_t *child_ud, */ static int hubd_create_child(dev_info_t *dip, - hubd_t *hubd, - usba_device_t *hubd_ud, - usb_port_status_t port_status, - usb_port_t port, - int iteration) + hubd_t *hubd, + usba_device_t *hubd_ud, + usb_port_status_t port_status, + usb_port_t port, + int iteration) { dev_info_t *child_dip = NULL; usb_dev_descr_t usb_dev_descr; @@ -6869,9 +6865,9 @@ hubd_free_usba_device(hubd_t *hubd, usba_device_t *usba_device) */ static int hubd_busop_get_eventcookie(dev_info_t *dip, - dev_info_t *rdip, - char *eventname, - ddi_eventcookie_t *cookie) + dev_info_t *rdip, + char *eventname, + ddi_eventcookie_t *cookie) { hubd_t *hubd = (hubd_t *)hubd_get_soft_state(dip); @@ -6891,12 +6887,11 @@ hubd_busop_get_eventcookie(dev_info_t *dip, static int hubd_busop_add_eventcall(dev_info_t *dip, - dev_info_t *rdip, - ddi_eventcookie_t cookie, - void (*callback)(dev_info_t *dip, - ddi_eventcookie_t cookie, void *arg, - void *bus_impldata), - void *arg, ddi_callback_id_t *cb_id) + dev_info_t *rdip, + ddi_eventcookie_t cookie, + void (*callback)(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg, + void *bus_impldata), + void *arg, ddi_callback_id_t *cb_id) { hubd_t *hubd = (hubd_t *)hubd_get_soft_state(dip); usb_port_t port = hubd_child_dip2port(hubd, rdip); @@ -7671,7 +7666,7 @@ usba_hubdi_open(dev_info_t *dip, dev_t *devp, int flags, int otyp, /* ARGSUSED */ int usba_hubdi_close(dev_info_t *dip, dev_t dev, int flag, int otyp, - cred_t *credp) + cred_t *credp) { hubd_t *hubd; diff --git a/usr/src/uts/common/mapfiles/ksensor.mapfile b/usr/src/uts/common/mapfiles/ksensor.mapfile index 0374c957f7..51b65a2b9d 100644 --- a/usr/src/uts/common/mapfiles/ksensor.mapfile +++ b/usr/src/uts/common/mapfiles/ksensor.mapfile @@ -36,8 +36,10 @@ $mapfile_version 2 SYMBOL_SCOPE { global: ksensor_create { FLAGS = EXTERN }; - ksensor_create_temp_pcidev { FLAGS = EXTERN }; + ksensor_create_scalar_pcidev { FLAGS = EXTERN }; ksensor_remove { FLAGS = EXTERN }; + ksensor_kind_current { FLAGS = EXTERN }; ksensor_kind_temperature { FLAGS = EXTERN }; + ksensor_kind_voltage { FLAGS = EXTERN }; }; diff --git a/usr/src/uts/common/os/cred.c b/usr/src/uts/common/os/cred.c index 0bd6cfd44f..5e909667de 100644 --- a/usr/src/uts/common/os/cred.c +++ b/usr/src/uts/common/os/cred.c @@ -20,13 +20,14 @@ */ /* * Copyright (c) 2013, Ira Cooper. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ +/* All Rights Reserved */ /* * University Copyright- Copyright (c) 1982, 1986, 1988 @@ -288,7 +289,7 @@ crget(void) { cred_t *cr = kmem_cache_alloc(cred_cache, KM_SLEEP); - bcopy(kcred, cr, crsize); + bcopy(zone_kcred(), cr, crsize); cr->cr_ref = 1; zone_cred_hold(cr->cr_zone); if (cr->cr_label) @@ -377,7 +378,7 @@ crfree(cred_t *cr) /* * Copy a cred structure to a new one and free the old one. * The new cred will have two references. One for the calling process, - * and one for the thread. + * and one for the thread. */ cred_t * crcopy(cred_t *cr) @@ -404,7 +405,7 @@ crcopy(cred_t *cr) /* * Copy a cred structure to a new one and free the old one. * The new cred will have two references. One for the calling process, - * and one for the thread. + * and one for the thread. * This variation on crcopy uses a pre-allocated structure for the * "new" cred. */ diff --git a/usr/src/uts/common/os/ksensor.c b/usr/src/uts/common/os/ksensor.c index c89cad4206..491fbcc7cd 100644 --- a/usr/src/uts/common/os/ksensor.c +++ b/usr/src/uts/common/os/ksensor.c @@ -544,14 +544,29 @@ ksensor_create(dev_info_t *dip, const ksensor_ops_t *ops, void *arg, } int -ksensor_create_temp_pcidev(dev_info_t *dip, const ksensor_ops_t *ops, - void *arg, const char *name, id_t *idp) +ksensor_create_scalar_pcidev(dev_info_t *dip, uint_t kind, + const ksensor_ops_t *ops, void *arg, const char *name, id_t *idp) { char *pci_name, *type; + const char *class; int *regs, ret; uint_t nregs; uint16_t bus, dev; + switch (kind) { + case SENSOR_KIND_TEMPERATURE: + class = "ddi_sensor:temperature:pci"; + break; + case SENSOR_KIND_VOLTAGE: + class = "ddi_sensor:voltage:pci"; + break; + case SENSOR_KIND_CURRENT: + class = "ddi_sensor:current:pci"; + break; + default: + return (ENOTSUP); + } + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, 0, "device_type", &type) != DDI_PROP_SUCCESS) { return (EINVAL); @@ -579,8 +594,7 @@ ksensor_create_temp_pcidev(dev_info_t *dip, const ksensor_ops_t *ops, pci_name = kmem_asprintf("%x.%x:%s", bus, dev, name); - ret = ksensor_create(dip, ops, arg, pci_name, - "ddi_sensor:temperature:pci", idp); + ret = ksensor_create(dip, ops, arg, pci_name, class, idp); strfree(pci_name); return (ret); } @@ -750,7 +764,7 @@ ksensor_op_kind(id_t id, sensor_ioctl_kind_t *kind) } int -ksensor_op_temperature(id_t id, sensor_ioctl_temperature_t *temp) +ksensor_op_scalar(id_t id, sensor_ioctl_scalar_t *scalar) { int ret; ksensor_t *sensor; @@ -759,7 +773,7 @@ ksensor_op_temperature(id_t id, sensor_ioctl_temperature_t *temp) return (ret); } - ret = sensor->ksensor_ops->kso_temp(sensor->ksensor_arg, temp); + ret = sensor->ksensor_ops->kso_scalar(sensor->ksensor_arg, scalar); ksensor_release(sensor); return (ret); @@ -831,6 +845,20 @@ ksensor_kind_temperature(void *unused, sensor_ioctl_kind_t *k) return (0); } +int +ksensor_kind_current(void *unused, sensor_ioctl_kind_t *k) +{ + k->sik_kind = SENSOR_KIND_CURRENT; + return (0); +} + +int +ksensor_kind_voltage(void *unused, sensor_ioctl_kind_t *k) +{ + k->sik_kind = SENSOR_KIND_VOLTAGE; + return (0); +} + void ksensor_init(void) { diff --git a/usr/src/uts/common/os/softint.c b/usr/src/uts/common/os/softint.c index ecdb038c79..8801340cf9 100644 --- a/usr/src/uts/common/os/softint.c +++ b/usr/src/uts/common/os/softint.c @@ -58,29 +58,29 @@ * * Starting state is IDLE. * - * softint() + * softint() * * * (c) - * ____________________________________________________ - * | ^ ^ - * v (a) | (b) | - * IDLE--------------------->PEND--------------------->DRAIN - * ^ | | - * | | | - * | | | - * | | | - * | | | - * | d d - * | | | - * | v v - * | PEND DRAIN - * | (e) & & - * |<-----------------------STEAL STEAL - * ^ | - * | | - * | (e) v - * |_________________________<__________________________| + * ____________________________________________________ + * | ^ ^ + * v (a) | (b) | + * IDLE--------------------->PEND--------------------->DRAIN + * ^ | | + * | | | + * | | | + * | | | + * | | | + * | d d + * | | | + * | v v + * | PEND DRAIN + * | (e) & & + * |<-----------------------STEAL STEAL + * ^ | + * | | + * | (e) v + * |_________________________<__________________________| * * * @@ -146,9 +146,9 @@ uint_t softcall_pokemax = 10; /* * This ensures that softcall entries don't get stuck for long. It's expressed - * in 10 milliseconds as 1 unit. When hires_tick is set or other clock frequency - * is used, softcall_init() ensures that it's still expressed as 1 = 10 milli - * seconds. + * in 10 milliseconds as 1 unit. Regardless of the value of hires_tick or + * clock frequency, softcall_init() ensures that it's still expressed as 1 = + * 10 milliseconds. */ unsigned int softcall_delay = 1; diff --git a/usr/src/uts/common/sys/font.h b/usr/src/uts/common/sys/font.h index 5733686bf3..f8f154f428 100644 --- a/usr/src/uts/common/sys/font.h +++ b/usr/src/uts/common/sys/font.h @@ -84,9 +84,11 @@ typedef struct bitmap_data { } bitmap_data_t; typedef enum { - FONT_AUTO, - FONT_MANUAL, - FONT_BOOT + FONT_AUTO, /* This font is loaded by software */ + FONT_MANUAL, /* This font is loaded manually by user */ + FONT_BOOT, /* This font was passed to kernel by bootloader */ + FONT_BUILTIN, /* This font was built in at compile time */ + FONT_RELOAD /* This font is marked to be re-read from file */ } FONT_FLAGS; struct fontlist { diff --git a/usr/src/uts/common/sys/ksensor_impl.h b/usr/src/uts/common/sys/ksensor_impl.h index 8d91973bc3..7407a264a2 100644 --- a/usr/src/uts/common/sys/ksensor_impl.h +++ b/usr/src/uts/common/sys/ksensor_impl.h @@ -35,7 +35,7 @@ extern void ksensor_init(void); * Operations vectors. */ extern int ksensor_op_kind(id_t, sensor_ioctl_kind_t *); -extern int ksensor_op_temperature(id_t, sensor_ioctl_temperature_t *); +extern int ksensor_op_scalar(id_t, sensor_ioctl_scalar_t *); /* * Registration callbacks. diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index a5974f6d7d..bd668cdb6b 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -171,6 +171,7 @@ typedef enum { * Please append properties to the end of this list. Do not reorder the list. */ typedef enum { + MAC_PROP_PRIVATE = -1, MAC_PROP_DUPLEX = 0x00000001, MAC_PROP_SPEED, MAC_PROP_STATUS, @@ -248,8 +249,7 @@ typedef enum { MAC_PROP_ADV_50GFDX_CAP, MAC_PROP_EN_50GFDX_CAP, MAC_PROP_EN_FEC_CAP, - MAC_PROP_ADV_FEC_CAP, - MAC_PROP_PRIVATE = -1 + MAC_PROP_ADV_FEC_CAP } mac_prop_id_t; /* diff --git a/usr/src/uts/common/sys/sensors.h b/usr/src/uts/common/sys/sensors.h index a39dfca239..a5d830a933 100644 --- a/usr/src/uts/common/sys/sensors.h +++ b/usr/src/uts/common/sys/sensors.h @@ -33,6 +33,8 @@ extern "C" { */ #define SENSOR_KIND_UNKNOWN 0x00 #define SENSOR_KIND_TEMPERATURE 0x01 +#define SENSOR_KIND_VOLTAGE 0x02 +#define SENSOR_KIND_CURRENT 0x03 /* * Lists of units that senors may have. @@ -41,52 +43,60 @@ extern "C" { #define SENSOR_UNIT_CELSIUS 0x01 #define SENSOR_UNIT_FAHRENHEIT 0x02 #define SENSOR_UNIT_KELVIN 0x03 +#define SENSOR_UNIT_VOLTS 0x04 +#define SENSOR_UNIT_AMPS 0x05 #define SENSOR_IOCTL (('s' << 24) | ('e' << 16) | ('n' << 8)) /* * Ask the sensor what kind of sensor it is. */ -#define SENSOR_IOCTL_TYPE (SENSOR_IOCTL | 0x01) +#define SENSOR_IOCTL_KIND (SENSOR_IOCTL | 0x01) typedef struct sensor_ioctl_kind { uint64_t sik_kind; } sensor_ioctl_kind_t; /* - * Ask the sensor for a temperature measurement. The sensor is responsible for - * returning the units it's in. A temperature measurement is broken down into a + * Ask the sensor for a scalar measurement. The sensor is responsible for + * returning the units it's in. A scalar measurement is broken down into a * signed value and a notion of its granularity. The sit_gran member indicates - * the granularity: the number of increments per degree in the temperature - * measurement (the sit_temp member). sit_gran is signed and the sign indicates - * whether one needs to multiply or divide the granularity. For example, a - * value that set sit_gran to 10 would mean that the value in sit_temp was in - * 10ths of a degree and that to get the actual value in degrees, one would - * divide by 10. On the other hand, a negative value means that we effectively - * have to multiply to get there. For example, a value of -2 would indicate that - * each value in sit_temp indicated two degrees and to get the temperature in - * degrees you would multiply sit_temp by two. + * the granularity: the number of increments per unit in the measurement (the + * sit_value member). sit_gran is signed and the sign indicates whether one + * needs to multiply or divide the granularity. The sit_prec member describes a + * +/- value (taking sit_gran into account) that describes the precision of the + * sensor. + * + * For example, consider a temperature sensor that set sit_gran to 10. This + * would mean that the value in sit_value was in 10ths of a degree and that to + * get the actual value in degrees, one would divide by 10. On the other hand, a + * negative value means that we effectively have to multiply to get there. For + * example, a value of -2 would indicate that each value in sit_value indicated + * two degrees and to get the temperature in degrees you would multiply + * sit_value * by two. */ -#define SENSOR_IOCTL_TEMPERATURE (SENSOR_IOCTL | 0x02) +#define SENSOR_IOCTL_SCALAR (SENSOR_IOCTL | 0x02) -typedef struct sensor_ioctl_temperature { - uint32_t sit_unit; - int32_t sit_gran; - uint32_t sit_prec; - uint32_t sit_pad; - int64_t sit_temp; -} sensor_ioctl_temperature_t; +typedef struct sensor_ioctl_scalar { + uint32_t sis_unit; + int32_t sis_gran; + uint32_t sis_prec; + uint32_t sis_pad; + int64_t sis_value; +} sensor_ioctl_scalar_t; #ifdef _KERNEL typedef int (*ksensor_kind_f)(void *, sensor_ioctl_kind_t *); -typedef int (*ksensor_temp_f)(void *, sensor_ioctl_temperature_t *); +typedef int (*ksensor_scalar_f)(void *, sensor_ioctl_scalar_t *); typedef struct { - ksensor_kind_f kso_kind; - ksensor_temp_f kso_temp; + ksensor_kind_f kso_kind; + ksensor_scalar_f kso_scalar; } ksensor_ops_t; extern int ksensor_kind_temperature(void *, sensor_ioctl_kind_t *); +extern int ksensor_kind_voltage(void *, sensor_ioctl_kind_t *); +extern int ksensor_kind_current(void *, sensor_ioctl_kind_t *); /* * Create a sensor where the class and name is supplied. @@ -95,11 +105,11 @@ extern int ksensor_create(dev_info_t *, const ksensor_ops_t *, void *, const char *, const char *, id_t *); /* - * Create a temperature sensor for a PCI device. If this is not a device-wide + * Create a scalar sensor for a PCI device. If this is not a device-wide * (e.g. per-function) sensor, this should not be used. */ -extern int ksensor_create_temp_pcidev(dev_info_t *, const ksensor_ops_t *, - void *, const char *, id_t *); +extern int ksensor_create_scalar_pcidev(dev_info_t *, uint_t, + const ksensor_ops_t *, void *, const char *, id_t *); /* * Remove a named or all sensors from this driver. diff --git a/usr/src/uts/common/sys/smbios.h b/usr/src/uts/common/sys/smbios.h index 55048d549d..b8b470b79a 100644 --- a/usr/src/uts/common/sys/smbios.h +++ b/usr/src/uts/common/sys/smbios.h @@ -22,6 +22,7 @@ /* * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2018, Joyent, Inc. + * Copyright 2020 Oxide Computer Company * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -527,6 +528,8 @@ typedef struct smbios_processor { #define SMB_PRU_BGA1392 0x3A /* Socket BGA1392 */ #define SMB_PRU_BGA1510 0x3B /* Socket BGA1510 */ #define SMB_PRU_BGA1528 0x3C /* Socket BGA1528 */ +#define SMB_PRU_LGA4189 0x3D /* Socket LGA4189 */ +#define SMB_PRU_LGA1200 0x3E /* Socket LGA1200 */ #define SMB_PRC_RESERVED 0x0001 /* reserved */ #define SMB_PRC_UNKNOWN 0x0002 /* unknown */ @@ -944,6 +947,9 @@ typedef struct smbios_slot { uint8_t smbl_df; /* device/function number */ uint8_t smbl_dbw; /* data bus width */ uint8_t smbl_npeers; /* PCIe bifurcation peers */ + uint8_t smbl_info; /* slot info */ + uint8_t smbl_pwidth; /* slot physical width */ + uint32_t smbl_pitch; /* slot pitch in 10um */ } smbios_slot_t; #define SMB_SLT_OTHER 0x01 /* other */ @@ -976,8 +982,8 @@ typedef struct smbios_slot { #define SMB_SLT_MXM_V 0x1C /* MXM Type IV */ #define SMB_SLT_MXM3_A 0x1D /* MXM 3.0 Type A */ #define SMB_SLT_MXM3_B 0x1E /* MXM 3.0 Type B */ -#define SMB_SLT_PCIEG2_SFF 0x1F /* PCI Express Gen 2 SFF-8639 */ -#define SMB_SLT_PCIEG3_SFF 0x20 /* PCI Express Gen 3 SFF-8639 */ +#define SMB_SLT_PCIEG2_SFF 0x1F /* PCI Express Gen 2 SFF-8639 (U.2) */ +#define SMB_SLT_PCIEG3_SFF 0x20 /* PCI Express Gen 3 SFF-8639 (U.2) */ /* * These lines must be on one line for the string generating code. */ @@ -986,6 +992,11 @@ typedef struct smbios_slot { #define SMB_SLT_PCIE_M52_WOBSKO 0x22 /* PCI Express Mini 52-pin without bottom-side keep-outs */ /* END CSTYLED */ #define SMB_SLT_PCIE_M76 0x23 /* PCI Express Mini 72-pin */ +#define SMB_SLT_PCIEG4_SFF 0x24 /* PCI Express Gen 4 SFF-8639 (U.2) */ +#define SMB_SLT_PCIEG5_SFF 0x25 /* PCI Express Gen 5 SFF-8639 (U.2) */ +#define SMB_SLT_OCP3_SFF 0x26 /* OCP NIC 3.0 Small Form Factor */ +#define SMB_SLT_OCP3_LFF 0x27 /* OCP NIC 3.0 Large Form Factor */ +#define SMB_SLT_OCP_PRE 0x28 /* OCP NIC prior to 3.0 */ #define SMB_SLT_CXL1 0x30 /* CXL Flexbus 1.0 */ #define SMB_SLT_PC98_C20 0xA0 /* PC-98/C20 */ #define SMB_SLT_PC98_C24 0xA1 /* PC-98/C24 */ @@ -1016,6 +1027,15 @@ typedef struct smbios_slot { #define SMB_SLT_PCIE4G4 0xBB /* PCI Exp. Gen 4 x4 */ #define SMB_SLT_PCIE4G8 0xBC /* PCI Exp. Gen 4 x8 */ #define SMB_SLT_PCIE4G16 0xBD /* PCI Exp. Gen 4 x16 */ +#define SMB_SLT_PCIE5G 0xBE /* PCI Exp. Gen 5 */ +#define SMB_SLT_PCIE5G1 0xBF /* PCI Exp. Gen 5 x1 */ +#define SMB_SLT_PCIE5G2 0xC0 /* PCI Exp. Gen 5 x2 */ +#define SMB_SLT_PCIE5G4 0xC1 /* PCI Exp. Gen 5 x4 */ +#define SMB_SLT_PCIE5G8 0xC2 /* PCI Exp. Gen 5 x8 */ +#define SMB_SLT_PCIE5G16 0xC3 /* PCI Exp. Gen 5 x16 */ +#define SMB_SLT_PCIEG6P 0xC4 /* PCI Exp. Gen 6+ */ +#define SMB_SLT_EDSFF_E1 0xC5 /* Ent. and DC 1U E1 Form Factor */ +#define SMB_SLT_EDSFF_E3 0xC6 /* Ent. and DC 3" E3 Form Factor */ #define SMB_SLW_OTHER 0x01 /* other */ #define SMB_SLW_UNKNOWN 0x02 /* unknown */ @@ -1041,6 +1061,8 @@ typedef struct smbios_slot { #define SMB_SLL_UNKNOWN 0x02 /* unknown */ #define SMB_SLL_SHORT 0x03 /* short length */ #define SMB_SLL_LONG 0x04 /* long length */ +#define SMB_SLL_2IN5 0x05 /* 2.5" drive form factor */ +#define SMB_SLL_3IN5 0x06 /* 3.5" drive form factor */ #define SMB_SLCH1_UNKNOWN 0x01 /* characteristics unknown */ #define SMB_SLCH1_5V 0x02 /* provides 5.0V */ @@ -1055,6 +1077,9 @@ typedef struct smbios_slot { #define SMB_SLCH2_HOTPLUG 0x02 /* slot supports hot-plug devices */ #define SMB_SLCH2_SMBUS 0x04 /* slot supports SMBus signal */ #define SMB_SLCH2_BIFUR 0x08 /* slot supports PCIe bifurcation */ +#define SMB_SLCH2_SURPREM 0x10 /* slot supports surprise removal */ +#define SMB_SLCH2_CXL1 0x20 /* Flexbus slot, CXL 1.0 capable */ +#define SMB_SLCH2_CXL2 0x40 /* Flexbus slot, CXL 2.0 capable */ /* * SMBIOS 7.10.9 Slot Peer Devices @@ -1178,7 +1203,7 @@ typedef struct smbios_memarray { #define SMB_MAL_PC98C24 0xA1 /* PC-98/C24 add-on card */ #define SMB_MAL_PC98E 0xA2 /* PC-98/E add-on card */ #define SMB_MAL_PC98LB 0xA3 /* PC-98/Local bus add-on card */ -#define SMB_MAL_CXL1 0xA4 /* CXL Flexbus 1.0 add-on card */ +#define SMB_MAL_CXL1 0xA4 /* CXL add-on card */ #define SMB_MAU_OTHER 0x01 /* other */ #define SMB_MAU_UNKNOWN 0x02 /* unknown */ @@ -1285,6 +1310,8 @@ typedef struct smbios_memdevice { #define SMB_MDT_LOGNV 0x1F /* Logical non-volatile device */ #define SMB_MDT_HBM 0x20 /* High Bandwidth Memory */ #define SMB_MDT_HBM2 0x21 /* High Bandwidth Memory 2 */ +#define SMB_MDT_DDR5 0x22 /* DDR5 */ +#define SMB_MDT_LPDDR5 0x23 /* LPDDR5 */ #define SMB_MDF_OTHER 0x0002 /* other */ #define SMB_MDF_UNKNOWN 0x0004 /* unknown */ @@ -1313,7 +1340,7 @@ typedef struct smbios_memdevice { #define SMB_MTECH_NVDIMM_N 0x04 /* NVDIMM-N */ #define SMB_MTECH_NVDIMM_F 0x05 /* NVDIMM-F */ #define SMB_MTECH_NVDIMM_P 0x06 /* NVDIMM-P */ -#define SMB_MTECH_INTCPM 0x07 /* Intel Optane DC Persistent Memory */ +#define SMB_MTECH_INTCPM 0x07 /* Intel Optane persistent memory */ #define SMB_MOMC_RESERVED 0x01 /* reserved */ #define SMB_MOMC_OTHER 0x02 /* other */ @@ -1838,7 +1865,8 @@ typedef struct smbios_memdevice_ext { #define SMB_VERSION_31 0x0301 /* SMBIOS encoding for DMTF spec 3.1 */ #define SMB_VERSION_32 0x0302 /* SMBIOS encoding for DMTF spec 3.2 */ #define SMB_VERSION_33 0x0303 /* SMBIOS encoding for DMTF spec 3.3 */ -#define SMB_VERSION SMB_VERSION_33 /* SMBIOS latest version definitions */ +#define SMB_VERSION_34 0x0304 /* SMBIOS encoding for DMTF spec 3.4 */ +#define SMB_VERSION SMB_VERSION_34 /* SMBIOS latest version definitions */ #define SMB_O_NOCKSUM 0x1 /* do not verify header checksums */ #define SMB_O_NOVERS 0x2 /* do not verify header versions */ diff --git a/usr/src/uts/common/sys/smbios_impl.h b/usr/src/uts/common/sys/smbios_impl.h index 69ca79e94f..4b951b702f 100644 --- a/usr/src/uts/common/sys/smbios_impl.h +++ b/usr/src/uts/common/sys/smbios_impl.h @@ -22,6 +22,7 @@ /* * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2018, Joyent, Inc. + * Copyright 2020 Oxide Computer Company * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -250,9 +251,26 @@ typedef struct smb_slot { uint8_t smbsl_dbw; /* Data bus width */ uint8_t smbsl_npeers; /* Peer bdf groups */ smb_slot_peer_t smbsl_peers[]; /* bifurcation peers */ + /* There are later additions in 3.4+, see smbios_slot_cont_t */ } smb_slot_t; /* + * After the variable number of smbsl_peers, the smbios_slot has continued in + * size and has the following members defined as of version 3.4. These occur + * starting at byte 14 + 5 * smbsl_npeers. + */ +typedef struct smb_slot_cont { + uint8_t smbsl_info; /* slot info */ + uint8_t smbsl_pwidth; /* slot physical width */ + uint16_t smbsl_pitch; /* slot pitch */ +} smb_slot_cont_t; + +/* + * The first byte that the smb_slot_cont_t is defined to start at. + */ +#define SMB_SLOT_CONT_START 0x14 + +/* * SMBIOS implementation structure for SMB_TYPE_OBDEVS. */ typedef struct smb_obdev { diff --git a/usr/src/uts/common/sys/socket_proto.h b/usr/src/uts/common/sys/socket_proto.h index 4e1a4a0f35..825d0501c7 100644 --- a/usr/src/uts/common/sys/socket_proto.h +++ b/usr/src/uts/common/sys/socket_proto.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2020 Joyent, Inc. */ #ifndef _SYS_SOCKET_PROTO_H_ @@ -202,7 +203,16 @@ struct sock_upcalls_s { void (*su_signal_oob)(sock_upper_handle_t, ssize_t); void (*su_zcopy_notify)(sock_upper_handle_t); void (*su_set_error)(sock_upper_handle_t, int); + /* + * NOTE: This function frees upper handle items. Caller cannot + * rely on them after this upcall. + */ void (*su_closed)(sock_upper_handle_t); + /* + * NOTE: This function MUST be implemented without using lower-level + * downcalls or accesses. This allows callers to ensure su_closed() + * upcalls can happen indepdently or concurrently. + */ vnode_t *(*su_get_vnode)(sock_upper_handle_t); }; diff --git a/usr/src/uts/common/sys/time.h b/usr/src/uts/common/sys/time.h index a69bf4dd63..f6cfa1a7e5 100644 --- a/usr/src/uts/common/sys/time.h +++ b/usr/src/uts/common/sys/time.h @@ -16,6 +16,8 @@ * * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Joyent, Inc. + * + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -365,14 +367,14 @@ extern todinfo_t utc_to_tod(time_t); extern time_t tod_to_utc(todinfo_t); extern int hr_clock_lock(void); extern void hr_clock_unlock(int); -extern hrtime_t gethrtime(void); -extern hrtime_t gethrtime_unscaled(void); +extern hrtime_t gethrtime(void); +extern hrtime_t gethrtime_unscaled(void); extern hrtime_t gethrtime_max(void); extern hrtime_t gethrtime_waitfree(void); extern void scalehrtime(hrtime_t *); extern uint64_t unscalehrtime(hrtime_t); -extern void gethrestime(timespec_t *); -extern time_t gethrestime_sec(void); +extern void gethrestime(timespec_t *); +extern time_t gethrestime_sec(void); extern void gethrestime_lasttick(timespec_t *); extern void hrt2ts(hrtime_t, timestruc_t *); extern hrtime_t ts2hrt(const timestruc_t *); @@ -408,6 +410,7 @@ int futimesat(int, const char *, const struct timeval *); int getitimer(int, struct itimerval *); int utimes(const char *, const struct timeval *); + #if defined(_XPG4_2) int setitimer(int, const struct itimerval *_RESTRICT_KYWD, struct itimerval *_RESTRICT_KYWD); @@ -418,6 +421,22 @@ int setitimer(int, struct itimerval *_RESTRICT_KYWD, #endif /* !defined(_KERNEL) ... defined(_XPG4_2) */ +#if !defined(_KERNEL) && !defined(_STRICT_SYMBOLS) +int futimes(int, const struct timeval *); +int lutimes(const char *, const struct timeval *); + +#define TIMESPEC_TO_TIMEVAL(tv, ts) { \ + (tv)->tv_sec = (ts)->tv_sec; \ + (tv)->tv_usec = (ts)->tv_nsec / 1000; \ +} + +#define TIMEVAL_TO_TIMESPEC(tv, ts) { \ + (ts)->tv_sec = (tv)->tv_sec; \ + (ts)->tv_nsec = (tv)->tv_usec * 1000; \ +} + +#endif /* !defined(_KERNEL) && !defined(_STRICT_SYMBOLS) */ + /* * gettimeofday() and settimeofday() were included in SVr4 due to their * common use in BSD based applications. They were to be included exactly diff --git a/usr/src/uts/i86pc/boot/boot_fb.c b/usr/src/uts/i86pc/boot/boot_fb.c index 1ac4789af7..e0e79bd14e 100644 --- a/usr/src/uts/i86pc/boot/boot_fb.c +++ b/usr/src/uts/i86pc/boot/boot_fb.c @@ -354,28 +354,44 @@ boot_get_color(uint32_t *fg, uint32_t *bg) /* ansi to solaris colors, see also boot_console.c */ if (fb_info.inverse == B_TRUE || fb_info.inverse_screen == B_TRUE) { - if (fb_info.fg_color < 16) - *bg = dim_xlate[fb_info.fg_color]; - else + if (fb_info.fg_color < XLATE_NCOLORS) { + /* + * white fg -> bright white bg + */ + if (fb_info.fg_color == pc_white) + *bg = brt_xlate[fb_info.fg_color]; + else + *bg = dim_xlate[fb_info.fg_color]; + } else { *bg = fb_info.fg_color; + } - if (fb_info.bg_color < 16) - *fg = brt_xlate[fb_info.bg_color]; - else + if (fb_info.bg_color < XLATE_NCOLORS) { + if (fb_info.bg_color == pc_white) + *fg = brt_xlate[fb_info.bg_color]; + else + *fg = dim_xlate[fb_info.bg_color]; + } else { *fg = fb_info.bg_color; + } } else { - if (fb_info.bg_color < 16) { - if (fb_info.bg_color == 7) + if (fb_info.fg_color < XLATE_NCOLORS) { + if (fb_info.fg_color == pc_white) + *fg = brt_xlate[fb_info.fg_color]; + else + *fg = dim_xlate[fb_info.fg_color]; + } else { + *fg = fb_info.fg_color; + } + + if (fb_info.bg_color < XLATE_NCOLORS) { + if (fb_info.bg_color == pc_white) *bg = brt_xlate[fb_info.bg_color]; else *bg = dim_xlate[fb_info.bg_color]; } else { *bg = fb_info.bg_color; } - if (fb_info.fg_color < 16) - *fg = dim_xlate[fb_info.fg_color]; - else - *fg = fb_info.fg_color; } } diff --git a/usr/src/uts/i86pc/io/apix/apix.c b/usr/src/uts/i86pc/io/apix/apix.c index 18dee7499a..cedc49147e 100644 --- a/usr/src/uts/i86pc/io/apix/apix.c +++ b/usr/src/uts/i86pc/io/apix/apix.c @@ -186,18 +186,6 @@ static void *apix_hdlp; static int apix_is_enabled = 0; /* - * Flag to indicate if APIX is to be enabled only for platforms - * with specific hw feature(s). - */ -int apix_hw_chk_enable = 1; - -/* - * Hw features that are checked for enabling APIX support. - */ -#define APIX_SUPPORT_X2APIC 0x00000001 -uint_t apix_supported_hw = APIX_SUPPORT_X2APIC; - -/* * apix_lock is used for cpu selection and vector re-binding */ lock_t apix_lock; @@ -272,22 +260,10 @@ apix_probe() if (get_hwenv() & HW_XEN_HVM) return (PSM_FAILURE); - /* check for hw features if specified */ - if (apix_hw_chk_enable) { - /* check if x2APIC mode is supported */ - if ((apix_supported_hw & APIX_SUPPORT_X2APIC) == - APIX_SUPPORT_X2APIC) { - if (apic_local_mode() == LOCAL_X2APIC) { - /* x2APIC mode activated by BIOS, switch ops */ - apic_mode = LOCAL_X2APIC; - apic_change_ops(); - } else if (!apic_detect_x2apic()) { - /* x2APIC mode is not supported in the hw */ - apix_enable = 0; - } - } - if (apix_enable == 0) - return (PSM_FAILURE); + if (apic_local_mode() == LOCAL_X2APIC) { + /* x2APIC mode activated by BIOS, switch ops */ + apic_mode = LOCAL_X2APIC; + apic_change_ops(); } rval = apic_probe_common(apix_psm_info.p_mach_idstring); diff --git a/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c b/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c index 6d1a99ea05..0d2d1fe1de 100644 --- a/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c +++ b/usr/src/uts/i86pc/io/gfx_private/gfxp_fb.c @@ -11,6 +11,7 @@ /* * Copyright 2016 Toomas Soome <tsoome@me.com> + * Copyright 2020 RackTop Systems, Inc. */ /* @@ -81,6 +82,17 @@ gfxp_check_for_console(dev_info_t *devi, struct gfxp_fb_softc *softc, uint16_t data16; /* + * fb_info is filled in by data gathered by the bootloader. + * In particular we are interested in "paddr" which is the physical + * address of the framebuffer. If that is not zero, then we have + * a valid framebuffer and we can use this device as a console. + */ + if (fb_info.paddr != 0) { + softc->flags |= GFXP_FLAG_CONSOLE; + return; + } + + /* * Based on Section 11.3, "PCI Display Subsystem Initialization", * of the 1.1 PCI-to-PCI Bridge Architecture Specification * determine if this is the boot console device. First, see diff --git a/usr/src/uts/i86pc/io/hpet_acpi.c b/usr/src/uts/i86pc/io/hpet_acpi.c index ac5a885a38..aace99b18b 100644 --- a/usr/src/uts/i86pc/io/hpet_acpi.c +++ b/usr/src/uts/i86pc/io/hpet_acpi.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2020 Oxide Computer Company */ #include <sys/hpet_acpi.h> @@ -34,6 +35,8 @@ #include <sys/clock.h> #include <sys/archsystm.h> #include <sys/cpupart.h> +#include <sys/x86_archext.h> +#include <sys/prom_debug.h> static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags); static boolean_t hpet_install_proxy(void); @@ -140,17 +143,36 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) (void) memset(&hpet_info, 0, sizeof (hpet_info)); hpet.supported = HPET_NO_SUPPORT; - if (idle_cpu_no_deep_c) + if ((get_hwenv() & HW_XEN_HVM) != 0) { + /* + * In some AWS EC2 guests, though the HPET is advertised via + * ACPI, programming the interrupt on the non-legacy timer can + * result in an immediate reset of the instance. It is not + * currently possible to tell whether this is an instance with + * broken HPET emulation or not, so we simply disable it across + * the board. + */ + PRM_POINT("will not program HPET in Xen HVM"); return (DDI_FAILURE); + } - if (!cpuid_deep_cstates_supported()) + if (idle_cpu_no_deep_c || + !cpuid_deep_cstates_supported()) { + /* + * If Deep C-States are disabled or not supported, then we do + * not need to program the HPET at all as it will not + * subsequently be used. + */ + PRM_POINT("no need to program the HPET"); return (DDI_FAILURE); + } hpet_establish_hooks(); /* * Get HPET ACPI table 1. */ + PRM_POINT("AcpiGetTable() HPET #1"); if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1, (ACPI_TABLE_HEADER **)&hpet_table))) { cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table"); @@ -162,14 +184,18 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) return (DDI_FAILURE); } + PRM_POINT("hpet_memory_map()"); la = hpet_memory_map(hpet_table); + PRM_DEBUG(la); if (la == NULL) { cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed"); return (DDI_FAILURE); } hpet_info.logical_address = la; + PRM_POINT("hpet_read_gen_cap()"); ret = hpet_read_gen_cap(&hpet_info); + PRM_DEBUG(ret); hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret); hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret); hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret); @@ -189,6 +215,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) } num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap; + PRM_DEBUG(num_timers); if ((num_timers < 3) || (num_timers > 32)) { cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers " "%lx", (long)num_timers); @@ -197,20 +224,23 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc( num_timers * sizeof (uint64_t), KM_SLEEP); + PRM_POINT("hpet_read_gen_config()"); ret = hpet_read_gen_config(&hpet_info); hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); /* - * Solaris does not use the HPET Legacy Replacement Route capabilities. + * illumos does not use the HPET Legacy Replacement Route capabilities. * This feature has been off by default on test systems. * The HPET spec does not specify if Legacy Replacement Route is - * on or off by default, so we explicitely set it off here. + * on or off by default, so we explicitly set it off here. * It should not matter which mode the HPET is in since we use * the first available non-legacy replacement timer: timer 2. */ + PRM_POINT("hpet_read_gen_config()"); (void) hpet_set_leg_rt_cnf(&hpet_info, 0); + PRM_POINT("hpet_read_gen_config() again"); ret = hpet_read_gen_config(&hpet_info); hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); @@ -218,6 +248,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info); hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info); + PRM_POINT("disable timer loop..."); for (ti = 0; ti < num_timers; ++ti) { ret = hpet_read_timer_N_config(&hpet_info, ti); /* @@ -231,6 +262,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret); } + PRM_POINT("disable timer loop complete"); /* * Be aware the Main Counter may need to be initialized in the future @@ -238,6 +270,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) * The HPET's Main Counter does not need to be initialize to a specific * value before starting it for use to wake up CPUs from Deep C-States. */ + PRM_POINT("hpet_start_main_counter()"); if (hpet_start_main_counter(&hpet_info) != AE_OK) { cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed"); return (DDI_FAILURE); @@ -247,6 +280,7 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) /* * Read main counter twice to record HPET latency for debugging. */ + PRM_POINT("TSC and HPET reads:"); hpet_info.tsc[0] = tsc_read(); hpet_info.hpet_main_counter_reads[0] = hpet_read_main_counter_value(&hpet_info); @@ -255,6 +289,12 @@ hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) hpet_read_main_counter_value(&hpet_info); hpet_info.tsc[2] = tsc_read(); + PRM_DEBUG(hpet_info.hpet_main_counter_reads[0]); + PRM_DEBUG(hpet_info.hpet_main_counter_reads[1]); + PRM_DEBUG(hpet_info.tsc[0]); + PRM_DEBUG(hpet_info.tsc[1]); + PRM_DEBUG(hpet_info.tsc[2]); + ret = hpet_read_gen_config(&hpet_info); hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); @@ -293,6 +333,7 @@ hpet_acpi_fini(void) static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags) { + PRM_POINT("hpet_get_IOAPIC_intr_capable_timer()"); if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) { cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed."); return (DDI_FAILURE); @@ -300,6 +341,7 @@ hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags) hpet_init_proxy_data(); + PRM_POINT("hpet_install_interrupt_handler()"); if (hpet_install_interrupt_handler(&hpet_isr, hpet_info.cstate_timer.intr) != AE_OK) { cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed."); @@ -314,13 +356,16 @@ hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags) * Avoid a possibly stuck interrupt by programing the HPET's timer here * before the I/O APIC is programmed to handle this interrupt. */ + PRM_POINT("hpet_timer_set_up()"); hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer, hpet_info.cstate_timer.intr); + PRM_POINT("back from hpet_timer_set_up()"); /* * All HPET functionality is supported. */ hpet.supported = HPET_FULL_SUPPORT; + PRM_POINT("HPET full support"); return (DDI_SUCCESS); } @@ -564,14 +609,25 @@ hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l) } static void -hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l) +hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t conf) { - if (hip->timer_n_config[n].size_cap == 1) - *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS( - hip->logical_address, n) = l; - else - *(uint32_t *)HPET_TIMER_N_CONF_ADDRESS( - hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l); + /* + * The configuration register size is not affected by the size + * capability; it is always a 64-bit value. The top 32-bit half of + * this register is always read-only so we constrain our write to the + * bottom half. + */ + uint32_t *confaddr = (uint32_t *)HPET_TIMER_N_CONF_ADDRESS( + hip->logical_address, n); + uint32_t conf32 = 0xFFFFFFFF & conf; + + PRM_DEBUG(n); + PRM_DEBUG(conf); + PRM_DEBUG(conf32); + + *confaddr = conf32; + + PRM_POINT("write done"); } static void @@ -630,16 +686,19 @@ hpet_install_interrupt_handler(avfunc func, int vector) static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip) { - int timer; - int intr; + int timer; + int intr; for (timer = HPET_FIRST_NON_LEGACY_TIMER; timer < hip->gen_cap.num_tim_cap; ++timer) { - if (!hpet_timer_available(hip->allocated_timers, timer)) continue; intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1; + + PRM_DEBUG(timer); + PRM_DEBUG(intr); + if (intr >= 0) { hpet_timer_alloc(&hip->allocated_timers, timer); hip->cstate_timer.timer = timer; @@ -678,7 +737,12 @@ hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt) { uint64_t conf; + PRM_DEBUG(timer_n); + PRM_DEBUG(interrupt); + + PRM_POINT("hpet_read_timer_N_config()"); conf = hpet_read_timer_N_config(hip, timer_n); + PRM_DEBUG(conf); /* * Caller is required to verify this interrupt route is supported. @@ -691,7 +755,10 @@ hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt) conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT; /* disabled */ conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT; /* Level Triggered */ + PRM_POINT("hpet_write_timer_N_config()"); + PRM_DEBUG(conf); hpet_write_timer_N_config(hip, timer_n, conf); + PRM_POINT("back from hpet_write_timer_N_config()"); } /* diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c index aea7f2e856..9b9944fbd0 100644 --- a/usr/src/uts/i86pc/io/mp_platform_common.c +++ b/usr/src/uts/i86pc/io/mp_platform_common.c @@ -25,6 +25,7 @@ * Copyright (c) 2017 by Delphix. All rights reserved. * Copyright (c) 2019, Joyent, Inc. * Copyright 2020 RackTop Systems, Inc. + * Copyright 2020 Oxide Computer Company */ /* * Copyright (c) 2010, Intel Corporation. @@ -72,6 +73,7 @@ #include <sys/note.h> #include <sys/pci_intr_lib.h> #include <sys/sunndi.h> +#include <sys/prom_debug.h> #if !defined(__xpv) #include <sys/hpet.h> #include <sys/clock.h> @@ -334,7 +336,7 @@ apic_probe_common(char *modname) uint32_t mpct_addr, ebda_start = 0, base_mem_end; caddr_t biosdatap; caddr_t mpct = NULL; - caddr_t fptr; + caddr_t fptr = NULL; int i, mpct_size = 0, mapsize, retval = PSM_FAILURE; ushort_t ebda_seg, base_mem_size; struct apic_mpfps_hdr *fpsp; @@ -342,6 +344,8 @@ apic_probe_common(char *modname) int bypass_cpu_and_ioapics_in_mptables; int acpi_user_options; + PRM_POINT("apic_probe_common()"); + if (apic_forceload < 0) return (retval); @@ -359,11 +363,15 @@ apic_probe_common(char *modname) if (!apic_use_acpi) apic_use_acpi_madt_only = 0; + PRM_POINT("acpi_probe()"); retval = acpi_probe(modname); + PRM_DEBUG(retval); /* in UEFI system, there is no BIOS data */ - if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), 0, "efi-systab")) + if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), 0, "efi-systab")) { + PRM_POINT("UEFI system!"); goto apic_ret; + } /* * mapin the bios data area 40:0 @@ -371,17 +379,21 @@ apic_probe_common(char *modname) * 40:0Eh - two-byte location for the exact starting address of * the EBDA segment for EISA */ + PRM_POINT("psm_map_phys()"); biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); + PRM_DEBUG(biosdatap); if (!biosdatap) goto apic_ret; fpsp = (struct apic_mpfps_hdr *)NULL; mapsize = MPFPS_RAM_WIN_LEN; /*LINTED: pointer cast may result in improper alignment */ ebda_seg = *((ushort_t *)(biosdatap+0xe)); + PRM_DEBUG(ebda_seg); /* check the 1k of EBDA */ if (ebda_seg) { ebda_start = ((uint32_t)ebda_seg) << 4; fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); + PRM_DEBUG(fptr); if (fptr) { if (!(fpsp = apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) @@ -389,6 +401,7 @@ apic_probe_common(char *modname) } } /* If not in EBDA, check the last k of system base memory */ + PRM_DEBUG(fpsp); if (!fpsp) { /*LINTED: pointer cast may result in improper alignment */ base_mem_size = *((ushort_t *)(biosdatap + 0x13)); @@ -402,6 +415,7 @@ apic_probe_common(char *modname) fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, PROT_READ); + PRM_DEBUG(fptr); if (fptr) { if (!(fpsp = apic_find_fps_sig(fptr, @@ -410,13 +424,16 @@ apic_probe_common(char *modname) } } } + PRM_POINT("psm_unmap_phys()"); psm_unmap_phys(biosdatap, 0x20); /* If still cannot find it, check the BIOS ROM space */ + PRM_DEBUG(fpsp); if (!fpsp) { mapsize = MPFPS_ROM_WIN_LEN; fptr = psm_map_phys(MPFPS_ROM_WIN_START, MPFPS_ROM_WIN_LEN, PROT_READ); + PRM_DEBUG(fptr); if (fptr) { if (!(fpsp = apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { @@ -426,13 +443,18 @@ apic_probe_common(char *modname) } } + PRM_DEBUG(fptr); + PRM_DEBUG(fpsp); + PRM_POINT("apic_checksum()"); if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { + PRM_POINT("psm_unmap_phys()"); psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); goto apic_ret; } apic_spec_rev = fpsp->mpfps_spec_rev; if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { + PRM_POINT("psm_unmap_phys()"); psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); goto apic_ret; } @@ -442,7 +464,9 @@ apic_probe_common(char *modname) /* check default configuration (dual CPUs) */ if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { + PRM_POINT("psm_unmap_phys()"); psm_unmap_phys(fptr, mapsize); + PRM_POINT("apic_handle_defconf()"); if ((retval = apic_handle_defconf()) != PSM_SUCCESS) return (retval); @@ -451,6 +475,7 @@ apic_probe_common(char *modname) /* MP Configuration Table */ mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); + PRM_DEBUG(mpct_addr); psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ @@ -472,6 +497,7 @@ apic_probe_common(char *modname) } mpct_size = (int)hdrp->mpcnf_tbl_length; + PRM_POINT("apic_set_pwroff_method_from_mpcnfhdr()"); apic_set_pwroff_method_from_mpcnfhdr(hdrp); psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); @@ -497,6 +523,8 @@ apic_probe_common(char *modname) hdrp = (struct apic_mp_cnf_hdr *)mpct; apicadr = (uint32_t *)mapin_apic((uint32_t)hdrp->mpcnf_local_apic, APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); + PRM_DEBUG(hdrp); + PRM_DEBUG(apicadr); if (!apicadr) goto apic_fail1; @@ -509,15 +537,20 @@ apic_probe_common(char *modname) } apic_fail1: + PRM_POINT("apic_fail1:"); psm_unmap_phys(mpct, mpct_size); mpct = NULL; apic_ret: + PRM_POINT("apic_ret:"); if (retval == PSM_SUCCESS) { extern int apic_ioapic_method_probe(); - if ((retval = apic_ioapic_method_probe()) == PSM_SUCCESS) + PRM_POINT("apic_ioapic_method_probe()"); + if ((retval = apic_ioapic_method_probe()) == PSM_SUCCESS) { + PRM_POINT("SUCCESS"); return (PSM_SUCCESS); + } } for (i = 0; i < apic_io_max; i++) @@ -533,6 +566,7 @@ apic_ret: if (mpct) psm_unmap_phys(mpct, mpct_size); + PRM_DEBUG(retval); return (retval); } @@ -632,20 +666,24 @@ acpi_probe(char *modname) if (!apic_use_acpi) return (PSM_FAILURE); + PRM_POINT("AcpiGetTable(MADT)"); if (AcpiGetTable(ACPI_SIG_MADT, 1, (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) { cmn_err(CE_WARN, "!acpi_probe: No MADT found!"); return (PSM_FAILURE); } + PRM_DEBUG((uint32_t)acpi_mapic_dtp->Address); + PRM_POINT("mapin_apic()"); apicadr = mapin_apic((uint32_t)acpi_mapic_dtp->Address, APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); if (!apicadr) return (PSM_FAILURE); if ((local_ids = (uint32_t *)kmem_zalloc(NCPU * sizeof (uint32_t), - KM_NOSLEEP)) == NULL) + KM_NOSLEEP)) == NULL) { return (PSM_FAILURE); + } if ((proc_ids = (uint32_t *)kmem_zalloc(NCPU * sizeof (uint32_t), KM_NOSLEEP)) == NULL) { @@ -653,7 +691,9 @@ acpi_probe(char *modname) return (PSM_FAILURE); } + PRM_POINT("acpi_get_apic_lid()"); local_ids[0] = acpi_get_apic_lid(); + PRM_DEBUG(local_ids[0]); apic_nproc = 1; apic_io_max = 0; @@ -662,6 +702,7 @@ acpi_probe(char *modname) madt_size = acpi_mapic_dtp->Header.Length; madt_seen = sizeof (*acpi_mapic_dtp); + PRM_DEBUG(madt_size); while (madt_seen < madt_size) { switch (ap->Type) { case ACPI_MADT_TYPE_LOCAL_APIC: @@ -812,6 +853,9 @@ acpi_probe(char *modname) ap = (ACPI_SUBTABLE_HEADER *)(((char *)ap) + ap->Length); } + PRM_DEBUG(apic_nproc); + PRM_DEBUG(apic_io_max); + /* We found multiple enabled cpus via MADT */ if ((apic_nproc > 1) && (apic_io_max > 0)) { acpi_found_smp_config = B_TRUE; @@ -826,6 +870,7 @@ acpi_probe(char *modname) if (plat_dr_support_cpu()) { apic_max_nproc = max_ncpus; } + PRM_DEBUG(apic_max_nproc); apic_cpus_size = max(apic_nproc, max_ncpus) * sizeof (*apic_cpus); if ((apic_cpus = kmem_zalloc(apic_cpus_size, KM_NOSLEEP)) == NULL) goto cleanup; @@ -834,15 +879,21 @@ acpi_probe(char *modname) * ACPI doesn't provide the local apic ver, get it directly from the * local apic */ + PRM_POINT("apic_read(APIC_VERS_REG)"); ver = apic_reg_ops->apic_read(APIC_VERS_REG); + PRM_DEBUG(ver); + PRM_DEBUG(apic_nproc); + PRM_DEBUG(boot_ncpus); for (i = 0; i < apic_nproc; i++) { apic_cpus[i].aci_local_id = local_ids[i]; apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); apic_cpus[i].aci_processor_id = proc_ids[i]; /* Only build mapping info for CPUs present at boot. */ - if (i < boot_ncpus) + if (i < boot_ncpus) { (void) acpica_map_cpu(i, proc_ids[i]); + } } + PRM_POINT("acpica_map_cpu loop complete"); /* * To support CPU dynamic reconfiguration, the apic CPU info structure @@ -881,8 +932,10 @@ acpi_probe(char *modname) apic_cpus[i].aci_status = APIC_CPU_FREE; } + PRM_POINT("ioapic reads"); for (i = 0; i < apic_io_max; i++) { ioapic_ix = i; + PRM_DEBUG(ioapic_ix); /* * need to check Sitka on the following acpi problem @@ -892,16 +945,20 @@ acpi_probe(char *modname) * actual id directly from the ioapic. */ id = ioapic_read(ioapic_ix, APIC_ID_CMD); + PRM_DEBUG(id); hid = (uchar_t)(id >> 24); + PRM_DEBUG(hid); if (hid != apic_io_id[i]) { if (apic_io_id[i] == 0) apic_io_id[i] = hid; else { /* set ioapic id to whatever reported by ACPI */ id = ((uint32_t)apic_io_id[i]) << 24; + PRM_POINT("ioapic_write(ID)"); ioapic_write(ioapic_ix, APIC_ID_CMD, id); } } + PRM_POINT("ioapic_read(VERS)"); ver = ioapic_read(ioapic_ix, APIC_VERS_CMD); apic_io_ver[i] = (uchar_t)(ver & 0xff); intmax = (ver >> 16) & 0xff; @@ -917,6 +974,7 @@ acpi_probe(char *modname) * acpi-user-options specifies legacy mode * (no SCI, no ACPI mode) */ + PRM_POINT("acpica_get_sci()"); if (acpica_get_sci(&sci, &sci_flags) != AE_OK) sci = -1; @@ -925,6 +983,7 @@ acpi_probe(char *modname) * If this fails, we don't attempt to use ACPI * even if we were able to get a MADT above */ + PRM_POINT("acpica_init()"); if (acpica_init() != AE_OK) { cmn_err(CE_WARN, "!apic: Failed to initialize acpica!"); goto cleanup; @@ -934,6 +993,7 @@ acpi_probe(char *modname) * Call acpica_build_processor_map() now that we have * ACPI namesspace access */ + PRM_POINT("acpica_build_processor_map()"); (void) acpica_build_processor_map(); /* @@ -952,15 +1012,19 @@ acpi_probe(char *modname) if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; + PRM_POINT("acpi_psm_init()"); if (acpi_psm_init(modname, acpi_verboseflags) == ACPI_PSM_FAILURE) goto cleanup; /* Enable ACPI APIC interrupt routing */ + PRM_POINT("apic_acpi_enter_apicmode()"); if (apic_acpi_enter_apicmode() != PSM_FAILURE) { cmn_err(CE_NOTE, "!apic: Using APIC interrupt routing mode"); + PRM_POINT("build_reserved_irqlist()"); build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); apic_enable_acpi = 1; if (apic_sci_vect > 0) { + PRM_POINT("acpica_set_core_feature()"); acpica_set_core_feature(ACPI_FEATURE_SCI_EVENT); } if (apic_use_acpi_madt_only) { @@ -970,16 +1034,18 @@ acpi_probe(char *modname) #if !defined(__xpv) /* - * probe ACPI for hpet information here which is used later - * in apic_picinit(). + * Probe ACPI for HPET information here which is used later in + * apic_picinit(). Note that we do not need to use the HPET at + * all on most modern systems, but if there is an actionable + * failure message it will be logged by the routine itself. */ - if (hpet_acpi_init(&apic_hpet_vect, &apic_hpet_flags) < 0) { - cmn_err(CE_NOTE, "!ACPI HPET table query failed\n"); - } + PRM_POINT("hpet_acpi_init()"); + (void) hpet_acpi_init(&apic_hpet_vect, &apic_hpet_flags); #endif kmem_free(local_ids, NCPU * sizeof (uint32_t)); kmem_free(proc_ids, NCPU * sizeof (uint32_t)); + PRM_POINT("SUCCESS"); return (PSM_SUCCESS); } /* if setting APIC mode failed above, we fall through to cleanup */ diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.c b/usr/src/uts/i86pc/io/pcplusmp/apic.c index efca63c814..c987391435 100644 --- a/usr/src/uts/i86pc/io/pcplusmp/apic.c +++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c @@ -26,6 +26,7 @@ * Copyright (c) 2010, Intel Corporation. * All rights reserved. * Copyright 2019 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ /* @@ -58,6 +59,7 @@ #include <sys/ddi_impldefs.h> #include <sys/pci.h> #include <sys/promif.h> +#include <sys/prom_debug.h> #include <sys/x86_archext.h> #include <sys/cpc_impl.h> #include <sys/uadmin.h> @@ -249,16 +251,23 @@ _info(struct modinfo *modinfop) static int apic_probe(void) { + PRM_POINT("apic_probe()"); + /* check if apix is initialized */ - if (apix_enable && apix_loaded()) + if (apix_enable && apix_loaded()) { + PRM_POINT("apic_probe FAILURE: apix is loaded"); return (PSM_FAILURE); + } /* * Check whether x2APIC mode was activated by BIOS. We don't support * that in pcplusmp as apix normally handles that. */ - if (apic_local_mode() == LOCAL_X2APIC) + PRM_POINT("apic_local_mode()"); + if (apic_local_mode() == LOCAL_X2APIC) { + PRM_POINT("apic_probe FAILURE: in x2apic mode"); return (PSM_FAILURE); + } /* continue using pcplusmp PSM */ apix_enable = 0; diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 1046a54126..e9a34f8630 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -203,7 +203,7 @@ check_svm_features(void) */ if (nasid == 0 || nasid > regs[1]) nasid = regs[1]; - KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %#x", nasid)); + KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %x", nasid)); /* bhyve requires the Nested Paging feature */ if (!(svm_feature & AMD_CPUID_SVM_NP)) { @@ -386,11 +386,11 @@ svm_msr_perm(uint8_t *perm_bitmap, uint64_t msr, bool read, bool write) int index, bit, error; error = svm_msr_index(msr, &index, &bit); - KASSERT(error == 0, ("%s: invalid msr %#lx", __func__, msr)); + KASSERT(error == 0, ("%s: invalid msr %lx", __func__, msr)); KASSERT(index >= 0 && index < SVM_MSR_BITMAP_SIZE, - ("%s: invalid index %d for msr %#lx", __func__, index, msr)); + ("%s: invalid index %d for msr %lx", __func__, index, msr)); KASSERT(bit >= 0 && bit <= 6, ("%s: invalid bit position %d " - "msr %#lx", __func__, bit, msr)); + "msr %lx", __func__, bit, msr)); if (read) perm_bitmap[index] &= ~(1UL << bit); @@ -444,7 +444,7 @@ svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask, if (ctrl->intercept[idx] != oldval) { svm_set_dirty(sc, vcpu, VMCB_CACHE_I); VCPU_CTR3(sc->vm, vcpu, "intercept[%d] modified " - "from %#x to %#x", idx, oldval, ctrl->intercept[idx]); + "from %x to %x", idx, oldval, ctrl->intercept[idx]); } } @@ -527,11 +527,23 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MONITOR); svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MWAIT); + /* Intercept privileged invalidation instructions. */ + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVD); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INVLPGA); + /* + * Intercept all virtualization-related instructions. + * * From section "Canonicalization and Consistency Checks" in APMv2 * the VMRUN intercept bit must be set to pass the consistency check. */ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMMCALL); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMLOAD); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMSAVE); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_STGI); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI); + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT); /* * The ASID will be set to a non-zero value just before VMRUN. @@ -672,22 +684,19 @@ svm_cpl(struct vmcb_state *state) static enum vm_cpu_mode svm_vcpu_mode(struct vmcb *vmcb) { - struct vmcb_segment seg; struct vmcb_state *state; - int error; state = &vmcb->state; if (state->efer & EFER_LMA) { - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(cs) error %d", __func__, - error)); + struct vmcb_segment *seg; /* * Section 4.8.1 for APM2, check if Code Segment has * Long attribute set in descriptor. */ - if (seg.attrib & VMCB_CS_ATTRIB_L) + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + if (seg->attrib & VMCB_CS_ATTRIB_L) return (CPU_MODE_64BIT); else return (CPU_MODE_COMPATIBILITY); @@ -848,10 +857,9 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit, struct vmcb *vmcb; struct vie *vie; struct vm_guest_paging paging; - struct vmcb_segment seg; + struct vmcb_segment *seg; char *inst_bytes = NULL; uint8_t inst_len = 0; - int error; vmcb = svm_get_vmcb(svm_sc, vcpu); ctrl = &vmcb->ctrl; @@ -861,22 +869,21 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit, vmexit->u.mmio_emul.gla = VIE_INVALID_GLA; svm_paging_info(vmcb, &paging); - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error)); - switch (paging.cpu_mode) { case CPU_MODE_REAL: - vmexit->u.mmio_emul.cs_base = seg.base; + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + vmexit->u.mmio_emul.cs_base = seg->base; vmexit->u.mmio_emul.cs_d = 0; break; case CPU_MODE_PROTECTED: case CPU_MODE_COMPATIBILITY: - vmexit->u.mmio_emul.cs_base = seg.base; + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + vmexit->u.mmio_emul.cs_base = seg->base; /* * Section 4.8.1 of APM2, Default Operand Size or D bit. */ - vmexit->u.mmio_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ? + vmexit->u.mmio_emul.cs_d = (seg->attrib & VMCB_CS_ATTRIB_D) ? 1 : 0; break; default: @@ -927,7 +934,7 @@ svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector, ctrl = svm_get_vmcb_ctrl(sc, vcpu); KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, - ("%s: event already pending %#lx", __func__, ctrl->eventinj)); + ("%s: event already pending %lx", __func__, ctrl->eventinj)); KASSERT(vector >=0 && vector <= 255, ("%s: invalid vector %d", __func__, vector)); @@ -949,7 +956,7 @@ svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector, if (ec_valid) { ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID; ctrl->eventinj |= (uint64_t)error << 32; - VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %#x", + VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %x", intrtype_to_str(intr_type), vector, error); } else { VCPU_CTR2(sc->vm, vcpu, "Injecting %s at vector %d", @@ -1050,32 +1057,6 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu) svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR); } -static int -svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val) -{ - struct vmcb_ctrl *ctrl; - int oldval, newval; - - ctrl = svm_get_vmcb_ctrl(sc, vcpu); - oldval = ctrl->intr_shadow; - newval = val ? 1 : 0; - if (newval != oldval) { - ctrl->intr_shadow = newval; - VCPU_CTR1(sc->vm, vcpu, "Setting intr_shadow to %d", newval); - } - return (0); -} - -static int -svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val) -{ - struct vmcb_ctrl *ctrl; - - ctrl = svm_get_vmcb_ctrl(sc, vcpu); - *val = ctrl->intr_shadow; - return (0); -} - /* * Once an NMI is injected it blocks delivery of further NMIs until the handler * executes an IRET. The IRET intercept is enabled when an NMI is injected to @@ -1103,7 +1084,7 @@ enable_nmi_blocking(struct svm_softc *sc, int vcpu) static void clear_nmi_blocking(struct svm_softc *sc, int vcpu) { - int error; + struct vmcb_ctrl *ctrl; KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked")); VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared"); @@ -1124,8 +1105,8 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu) * Set 'intr_shadow' to prevent an NMI from being injected on the * immediate VMRUN. */ - error = svm_modify_intr_shadow(sc, vcpu, 1); - KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error)); + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl->intr_shadow = 1; } #define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL @@ -1141,7 +1122,7 @@ svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval, bool *retu) state = svm_get_vmcb_state(sc, vcpu); oldval = state->efer; - VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %#lx/%#lx", oldval, newval); + VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %lx/%lx", oldval, newval); newval &= ~0xFE; /* clear the Read-As-Zero (RAZ) bits */ changed = oldval ^ newval; @@ -1275,7 +1256,7 @@ exit_reason_to_str(uint64_t reason) case VMCB_EXIT_MWAIT: return ("mwait"); default: - snprintf(reasonbuf, sizeof(reasonbuf), "%#lx", reason); + snprintf(reasonbuf, sizeof(reasonbuf), "%lx", reason); return (reasonbuf); } } @@ -1350,10 +1331,10 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) } KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event " - "injection valid bit is set %#lx", __func__, ctrl->eventinj)); + "injection valid bit is set %lx", __func__, ctrl->eventinj)); KASSERT(vmexit->inst_length >= 0 && vmexit->inst_length <= 15, - ("invalid inst_length %d: code (%#lx), info1 (%#lx), info2 (%#lx)", + ("invalid inst_length %d: code (%lx), info1 (%lx), info2 (%lx)", vmexit->inst_length, code, info1, info2)); svm_update_virqinfo(svm_sc, vcpu); @@ -1445,7 +1426,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) if (reflect) { /* Reflect the exception back into the guest */ VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception " - "%d/%#x into the guest", idtvec, (int)info1); + "%d/%x into the guest", idtvec, (int)info1); error = vm_inject_exception(svm_sc->vm, vcpu, idtvec, errcode_valid, info1, 0); KASSERT(error == 0, ("%s: vm_inject_exception error %d", @@ -1462,7 +1443,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) if (info1) { vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1); val = (uint64_t)edx << 32 | eax; - VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %#x val %#lx", + VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %x val %lx", ecx, val); if (emulate_wrmsr(svm_sc, vcpu, ecx, val, &retu)) { vmexit->exitcode = VM_EXITCODE_WRMSR; @@ -1475,7 +1456,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) ("emulate_wrmsr retu with bogus exitcode")); } } else { - VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %#x", ecx); + VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %x", ecx); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1); if (emulate_rdmsr(svm_sc, vcpu, ecx, &retu)) { vmexit->exitcode = VM_EXITCODE_RDMSR; @@ -1492,6 +1473,31 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) handled = svm_handle_inout(svm_sc, vcpu, vmexit); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1); break; + case VMCB_EXIT_SHUTDOWN: + vm_suspend(svm_sc->vm, VM_SUSPEND_TRIPLEFAULT); + handled = 1; + break; + case VMCB_EXIT_INVD: + case VMCB_EXIT_INVLPGA: + /* privileged invalidation instructions */ + vm_inject_ud(svm_sc->vm, vcpu); + handled = 1; + break; + case VMCB_EXIT_VMRUN: + case VMCB_EXIT_VMLOAD: + case VMCB_EXIT_VMSAVE: + case VMCB_EXIT_STGI: + case VMCB_EXIT_CLGI: + case VMCB_EXIT_SKINIT: + /* privileged vmm instructions */ + vm_inject_ud(svm_sc->vm, vcpu); + handled = 1; + break; + case VMCB_EXIT_VMMCALL: + /* No handlers make use of VMMCALL for now */ + vm_inject_ud(svm_sc->vm, vcpu); + handled = 1; + break; case VMCB_EXIT_CPUID: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1); handled = x86_emulate_cpuid(svm_sc->vm, vcpu, &state->rax, @@ -1510,7 +1516,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) /* EXITINFO2 contains the faulting guest physical address */ if (info1 & VMCB_NPF_INFO1_RSV) { VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with " - "reserved bits set: info1(%#lx) info2(%#lx)", + "reserved bits set: info1(%lx) info2(%lx)", info1, info2); } else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) { vmexit->exitcode = VM_EXITCODE_PAGING; @@ -1518,13 +1524,13 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) vmexit->u.paging.fault_type = npf_fault_type(info1); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_NESTED_FAULT, 1); VCPU_CTR3(svm_sc->vm, vcpu, "nested page fault " - "on gpa %#lx/%#lx at rip %#lx", + "on gpa %lx/%lx at rip %lx", info2, info1, state->rip); } else if (svm_npf_emul_fault(info1)) { svm_handle_mmio_emul(svm_sc, vcpu, vmexit, info2); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MMIO_EMUL, 1); VCPU_CTR3(svm_sc->vm, vcpu, "mmio_emul fault " - "for gpa %#lx/%#lx at rip %#lx", + "for gpa %lx/%lx at rip %lx", info2, info1, state->rip); } break; @@ -1539,7 +1545,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) break; } - VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d", + VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %lx/%d", handled ? "handled" : "unhandled", exit_reason_to_str(code), vmexit->rip, vmexit->inst_length); @@ -1576,14 +1582,14 @@ svm_inj_intinfo(struct svm_softc *svm_sc, int vcpu) return; KASSERT(VMCB_EXITINTINFO_VALID(intinfo), ("%s: entry intinfo is not " - "valid: %#lx", __func__, intinfo)); + "valid: %lx", __func__, intinfo)); svm_eventinject(svm_sc, vcpu, VMCB_EXITINTINFO_TYPE(intinfo), VMCB_EXITINTINFO_VECTOR(intinfo), VMCB_EXITINTINFO_EC(intinfo), VMCB_EXITINTINFO_EC_VALID(intinfo)); vmm_stat_incr(svm_sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1); - VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %#lx", intinfo); + VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %lx", intinfo); } /* @@ -1610,7 +1616,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) if (vcpustate->nextrip != state->rip) { ctrl->intr_shadow = 0; VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking " - "cleared due to rip change: %#lx/%#lx", + "cleared due to rip change: %lx/%lx", vcpustate->nextrip, state->rip); } @@ -1648,7 +1654,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) * then defer the NMI until after that. */ VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to " - "eventinj %#lx", ctrl->eventinj); + "eventinj %lx", ctrl->eventinj); /* * Use self-IPI to trigger a VM-exit as soon as @@ -1694,7 +1700,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) */ if ((state->rflags & PSL_I) == 0) { VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to " - "rflags %#lx", vector, state->rflags); + "rflags %lx", vector, state->rflags); need_intr_window = 1; goto done; } @@ -1708,7 +1714,7 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) if (ctrl->eventinj & VMCB_EVENTINJ_VALID) { VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to " - "eventinj %#lx", vector, ctrl->eventinj); + "eventinj %lx", vector, ctrl->eventinj); need_intr_window = 1; goto done; } @@ -1742,9 +1748,9 @@ done: * VMRUN. */ v_tpr = vlapic_get_cr8(vlapic); - KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr)); + KASSERT(v_tpr <= 15, ("invalid v_tpr %x", v_tpr)); if (ctrl->v_tpr != v_tpr) { - VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x", + VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %x to %x", ctrl->v_tpr, v_tpr); ctrl->v_tpr = v_tpr; svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR); @@ -1762,8 +1768,8 @@ done: */ KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 || (state->rflags & PSL_I) == 0 || ctrl->intr_shadow, - ("Bogus intr_window_exiting: eventinj (%#lx), " - "intr_shadow (%u), rflags (%#lx)", + ("Bogus intr_window_exiting: eventinj (%lx), " + "intr_shadow (%lu), rflags (%lx)", ctrl->eventinj, ctrl->intr_shadow, state->rflags)); enable_intr_window_exiting(sc, vcpu); } else { @@ -1838,7 +1844,7 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu) */ KASSERT(!alloc_asid, ("ASID allocation not necessary")); KASSERT(ctrl->tlb_ctrl == VMCB_TLB_FLUSH_NOTHING, - ("Invalid VMCB tlb_ctrl: %#x", ctrl->tlb_ctrl)); + ("Invalid VMCB tlb_ctrl: %x", ctrl->tlb_ctrl)); } if (alloc_asid) { @@ -1968,7 +1974,7 @@ svm_dr_leave_guest(struct svm_regctx *gctx) * Start vcpu with specified RIP. */ static int -svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, +svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap, struct vm_eventinfo *evinfo) { struct svm_regctx *gctx; @@ -2109,10 +2115,10 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, ctrl->vmcb_clean = vmcb_clean & ~vcpustate->dirty; vcpustate->dirty = 0; - VCPU_CTR1(vm, vcpu, "vmcb clean %#x", ctrl->vmcb_clean); + VCPU_CTR1(vm, vcpu, "vmcb clean %x", ctrl->vmcb_clean); /* Launch Virtual Machine. */ - VCPU_CTR1(vm, vcpu, "Resume execution at %#lx", state->rip); + VCPU_CTR1(vm, vcpu, "Resume execution at %lx", state->rip); svm_dr_enter_guest(gctx); svm_launch(vmcb_pa, gctx, get_pcpu()); svm_dr_leave_guest(gctx); @@ -2152,10 +2158,9 @@ svm_vmcleanup(void *arg) free(sc, M_SVM); } -static register_t * +static uint64_t * swctx_regptr(struct svm_regctx *regctx, int reg) { - switch (reg) { case VM_REG_GUEST_RBX: return (®ctx->sctx_rbx); @@ -2201,56 +2206,135 @@ swctx_regptr(struct svm_regctx *regctx, int reg) static int svm_getreg(void *arg, int vcpu, int ident, uint64_t *val) { - struct svm_softc *svm_sc; - register_t *reg; - - svm_sc = arg; + struct svm_softc *sc; + struct vmcb *vmcb; + uint64_t *regp; + uint64_t *fieldp; + struct vmcb_segment *seg; - if (ident == VM_REG_GUEST_INTR_SHADOW) { - return (svm_get_intr_shadow(svm_sc, vcpu, val)); - } + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); - if (vmcb_read(svm_sc, vcpu, ident, val) == 0) { + regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident); + if (regp != NULL) { + *val = *regp; return (0); } - reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + switch (ident) { + case VM_REG_GUEST_INTR_SHADOW: + *val = (vmcb->ctrl.intr_shadow != 0) ? 1 : 0; + break; - if (reg != NULL) { - *val = *reg; - return (0); + case VM_REG_GUEST_CR0: + case VM_REG_GUEST_CR2: + case VM_REG_GUEST_CR3: + case VM_REG_GUEST_CR4: + case VM_REG_GUEST_DR6: + case VM_REG_GUEST_DR7: + case VM_REG_GUEST_EFER: + case VM_REG_GUEST_RAX: + case VM_REG_GUEST_RFLAGS: + case VM_REG_GUEST_RIP: + case VM_REG_GUEST_RSP: + fieldp = vmcb_regptr(vmcb, ident, NULL); + *val = *fieldp; + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + seg = vmcb_segptr(vmcb, ident); + *val = seg->selector; + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + /* GDTR and IDTR don't have segment selectors */ + return (EINVAL); + + default: + return (EINVAL); } - VCPU_CTR1(svm_sc->vm, vcpu, "svm_getreg: unknown register %#x", ident); - return (EINVAL); + return (0); } static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val) { - struct svm_softc *svm_sc; - register_t *reg; - - svm_sc = arg; + struct svm_softc *sc; + struct vmcb *vmcb; + uint64_t *regp; + uint64_t *fieldp; + uint32_t dirty; + struct vmcb_segment *seg; - if (ident == VM_REG_GUEST_INTR_SHADOW) { - return (svm_modify_intr_shadow(svm_sc, vcpu, val)); - } + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); - if (vmcb_write(svm_sc, vcpu, ident, val) == 0) { + regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident); + if (regp != NULL) { + *regp = val; return (0); } - reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + dirty = VMCB_CACHE_NONE; + switch (ident) { + case VM_REG_GUEST_INTR_SHADOW: + vmcb->ctrl.intr_shadow = (val != 0) ? 1 : 0; + break; - if (reg != NULL) { - *reg = val; - return (0); + case VM_REG_GUEST_EFER: + fieldp = vmcb_regptr(vmcb, ident, &dirty); + /* EFER_SVM must always be set when the guest is executing */ + *fieldp = val | EFER_SVM; + dirty |= VMCB_CACHE_CR; + break; + + case VM_REG_GUEST_CR0: + case VM_REG_GUEST_CR2: + case VM_REG_GUEST_CR3: + case VM_REG_GUEST_CR4: + case VM_REG_GUEST_DR6: + case VM_REG_GUEST_DR7: + case VM_REG_GUEST_RAX: + case VM_REG_GUEST_RFLAGS: + case VM_REG_GUEST_RIP: + case VM_REG_GUEST_RSP: + fieldp = vmcb_regptr(vmcb, ident, &dirty); + *fieldp = val; + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + dirty |= VMCB_CACHE_SEG; + seg = vmcb_segptr(vmcb, ident); + seg->selector = (uint16_t)val; + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + /* GDTR and IDTR don't have segment selectors */ + return (EINVAL); + + default: + return (EINVAL); } - if (ident == VM_REG_GUEST_ENTRY_INST_LENGTH) { - /* Ignore. */ - return (0); + if (dirty != VMCB_CACHE_NONE) { + svm_set_dirty(sc, vcpu, dirty); } /* @@ -2259,8 +2343,119 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val) * whether 'running' is true/false. */ - VCPU_CTR1(svm_sc->vm, vcpu, "svm_setreg: unknown register %#x", ident); - return (EINVAL); + return (0); +} + +static int +svm_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +{ + struct vmcb *vmcb; + struct svm_softc *sc; + struct vmcb_segment *seg; + + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); + + switch (reg) { + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); + seg = vmcb_segptr(vmcb, reg); + /* + * Map seg_desc access to VMCB attribute format. + * + * SVM uses the 'P' bit in the segment attributes to indicate a + * NULL segment so clear it if the segment is marked unusable. + */ + seg->attrib = VMCB_ACCESS2ATTR(desc->access); + if (SEG_DESC_UNUSABLE(desc->access)) { + seg->attrib &= ~0x80; + } + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + svm_set_dirty(sc, vcpu, VMCB_CACHE_DT); + seg = vmcb_segptr(vmcb, reg); + break; + + default: + return (EINVAL); + } + + ASSERT(seg != NULL); + seg->base = desc->base; + seg->limit = desc->limit; + + return (0); +} + +static int +svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +{ + struct vmcb *vmcb; + struct svm_softc *sc; + struct vmcb_segment *seg; + + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); + + switch (reg) { + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_LDTR: + seg = vmcb_segptr(vmcb, reg); + desc->access = VMCB_ATTR2ACCESS(seg->attrib); + /* + * VT-x uses bit 16 to indicate a segment that has been loaded + * with a NULL selector (aka unusable). The 'desc->access' + * field is interpreted in the VT-x format by the + * processor-independent code. + * + * SVM uses the 'P' bit to convey the same information so + * convert it into the VT-x format. For more details refer to + * section "Segment State in the VMCB" in APMv2. + */ + if ((desc->access & 0x80) == 0) { + /* Unusable segment */ + desc->access |= 0x10000; + } + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_TR: + seg = vmcb_segptr(vmcb, reg); + desc->access = VMCB_ATTR2ACCESS(seg->attrib); + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + seg = vmcb_segptr(vmcb, reg); + /* + * Since there are no access bits associated with the GDTR or + * the IDTR, zero out the field to ensure it does not contain + * garbage which might confuse the consumer. + */ + desc->access = 0; + break; + + default: + return (EINVAL); + } + + ASSERT(seg != NULL); + desc->base = seg->base; + desc->limit = seg->limit; + return (0); } static int @@ -2368,8 +2563,8 @@ struct vmm_ops vmm_ops_amd = { .vmcleanup = svm_vmcleanup, .vmgetreg = svm_getreg, .vmsetreg = svm_setreg, - .vmgetdesc = vmcb_getdesc, - .vmsetdesc = vmcb_setdesc, + .vmgetdesc = svm_getdesc, + .vmsetdesc = svm_setdesc, .vmgetcap = svm_getcap, .vmsetcap = svm_setcap, .vmspace_alloc = svm_npt_alloc, diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.h b/usr/src/uts/i86pc/io/vmm/amd/svm.h index c78f7eb067..19739884c2 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.h +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.h @@ -35,31 +35,31 @@ * Guest register state that is saved outside the VMCB. */ struct svm_regctx { - register_t sctx_rbp; - register_t sctx_rbx; - register_t sctx_rcx; - register_t sctx_rdx; - register_t sctx_rdi; - register_t sctx_rsi; - register_t sctx_r8; - register_t sctx_r9; - register_t sctx_r10; - register_t sctx_r11; - register_t sctx_r12; - register_t sctx_r13; - register_t sctx_r14; - register_t sctx_r15; - register_t sctx_dr0; - register_t sctx_dr1; - register_t sctx_dr2; - register_t sctx_dr3; + uint64_t sctx_rbp; + uint64_t sctx_rbx; + uint64_t sctx_rcx; + uint64_t sctx_rdx; + uint64_t sctx_rdi; + uint64_t sctx_rsi; + uint64_t sctx_r8; + uint64_t sctx_r9; + uint64_t sctx_r10; + uint64_t sctx_r11; + uint64_t sctx_r12; + uint64_t sctx_r13; + uint64_t sctx_r14; + uint64_t sctx_r15; + uint64_t sctx_dr0; + uint64_t sctx_dr1; + uint64_t sctx_dr2; + uint64_t sctx_dr3; - register_t host_dr0; - register_t host_dr1; - register_t host_dr2; - register_t host_dr3; - register_t host_dr6; - register_t host_dr7; + uint64_t host_dr0; + uint64_t host_dr1; + uint64_t host_dr2; + uint64_t host_dr3; + uint64_t host_dr6; + uint64_t host_dr7; uint64_t host_debugctl; }; diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c index 5075b69867..b00f974c23 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c +++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c @@ -26,429 +26,130 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ + #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> -#include <machine/segments.h> -#include <machine/specialreg.h> #include <machine/vmm.h> -#include "vmm_ktr.h" - #include "vmcb.h" #include "svm.h" -#include "svm_softc.h" -/* - * The VMCB aka Virtual Machine Control Block is a 4KB aligned page - * in memory that describes the virtual machine. - * - * The VMCB contains: - * - instructions or events in the guest to intercept - * - control bits that modify execution environment of the guest - * - guest processor state (e.g. general purpose registers) - */ - -/* - * Return VMCB segment area. - */ -static struct vmcb_segment * +struct vmcb_segment * vmcb_segptr(struct vmcb *vmcb, int type) { - struct vmcb_state *state; - struct vmcb_segment *seg; - - state = &vmcb->state; + struct vmcb_state *state = &vmcb->state; switch (type) { case VM_REG_GUEST_CS: - seg = &state->cs; - break; - + return (&state->cs); case VM_REG_GUEST_DS: - seg = &state->ds; - break; - + return (&state->ds); case VM_REG_GUEST_ES: - seg = &state->es; - break; - + return (&state->es); case VM_REG_GUEST_FS: - seg = &state->fs; - break; - + return (&state->fs); case VM_REG_GUEST_GS: - seg = &state->gs; - break; - + return (&state->gs); case VM_REG_GUEST_SS: - seg = &state->ss; - break; - + return (&state->ss); case VM_REG_GUEST_GDTR: - seg = &state->gdt; - break; - + return (&state->gdt); case VM_REG_GUEST_IDTR: - seg = &state->idt; - break; - + return (&state->idt); case VM_REG_GUEST_LDTR: - seg = &state->ldt; - break; - + return (&state->ldt); case VM_REG_GUEST_TR: - seg = &state->tr; - break; - + return (&state->tr); default: - seg = NULL; - break; + panic("unexpected seg %d", type); } - - return (seg); } -static int -vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident, - uint64_t *val) +uint64_t * +vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp) { - struct vmcb *vmcb; - int off, bytes; - char *ptr; - - vmcb = svm_get_vmcb(softc, vcpu); - off = VMCB_ACCESS_OFFSET(ident); - bytes = VMCB_ACCESS_BYTES(ident); - - if ((off + bytes) >= sizeof (struct vmcb)) - return (EINVAL); - - ptr = (char *)vmcb; - - if (!write) - *val = 0; - - switch (bytes) { - case 8: - case 4: - case 2: - if (write) - memcpy(ptr + off, val, bytes); - else - memcpy(val, ptr + off, bytes); - break; - default: - VCPU_CTR1(softc->vm, vcpu, - "Invalid size %d for VMCB access: %d", bytes); - return (EINVAL); - } - - /* Invalidate all VMCB state cached by h/w. */ - if (write) - svm_set_dirty(softc, vcpu, 0xffffffff); - - return (0); -} - -/* - * Read from segment selector, control and general purpose register of VMCB. - */ -int -vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval) -{ - struct vmcb *vmcb; struct vmcb_state *state; - struct vmcb_segment *seg; - int err; + uint64_t *res = NULL; + uint32_t dirty = VMCB_CACHE_NONE; - vmcb = svm_get_vmcb(sc, vcpu); state = &vmcb->state; - err = 0; - - if (VMCB_ACCESS_OK(ident)) - return (vmcb_access(sc, vcpu, 0, ident, retval)); switch (ident) { case VM_REG_GUEST_CR0: - *retval = state->cr0; + res = &state->cr0; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_CR2: - *retval = state->cr2; + res = &state->cr2; + dirty = VMCB_CACHE_CR2; break; case VM_REG_GUEST_CR3: - *retval = state->cr3; + res = &state->cr3; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_CR4: - *retval = state->cr4; + res = &state->cr4; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_DR6: - *retval = state->dr6; + res = &state->dr6; + dirty = VMCB_CACHE_DR; break; case VM_REG_GUEST_DR7: - *retval = state->dr7; + res = &state->dr7; + dirty = VMCB_CACHE_DR; break; case VM_REG_GUEST_EFER: - *retval = state->efer; + res = &state->efer; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_RAX: - *retval = state->rax; + res = &state->rax; break; case VM_REG_GUEST_RFLAGS: - *retval = state->rflags; + res = &state->rflags; break; case VM_REG_GUEST_RIP: - *retval = state->rip; + res = &state->rip; break; case VM_REG_GUEST_RSP: - *retval = state->rsp; - break; - - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_FS: - case VM_REG_GUEST_GS: - case VM_REG_GUEST_SS: - case VM_REG_GUEST_LDTR: - case VM_REG_GUEST_TR: - seg = vmcb_segptr(vmcb, ident); - KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB", - __func__, ident)); - *retval = seg->selector; + res = &state->rsp; break; - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - /* GDTR and IDTR don't have segment selectors */ - err = EINVAL; - break; default: - err = EINVAL; + panic("unexpected register %d", ident); break; } - return (err); -} - -/* - * Write to segment selector, control and general purpose register of VMCB. - */ -int -vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val) -{ - struct vmcb *vmcb; - struct vmcb_state *state; - struct vmcb_segment *seg; - int err, dirtyseg; - - vmcb = svm_get_vmcb(sc, vcpu); - state = &vmcb->state; - dirtyseg = 0; - err = 0; - - if (VMCB_ACCESS_OK(ident)) - return (vmcb_access(sc, vcpu, 1, ident, &val)); - - switch (ident) { - case VM_REG_GUEST_CR0: - state->cr0 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_CR2: - state->cr2 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR2); - break; - - case VM_REG_GUEST_CR3: - state->cr3 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_CR4: - state->cr4 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_DR6: - state->dr6 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_DR); - break; - - case VM_REG_GUEST_DR7: - state->dr7 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_DR); - break; - - case VM_REG_GUEST_EFER: - /* EFER_SVM must always be set when the guest is executing */ - state->efer = val | EFER_SVM; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_RAX: - state->rax = val; - break; - - case VM_REG_GUEST_RFLAGS: - state->rflags = val; - break; - - case VM_REG_GUEST_RIP: - state->rip = val; - break; - - case VM_REG_GUEST_RSP: - state->rsp = val; - break; - - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_SS: - dirtyseg = 1; /* FALLTHROUGH */ - case VM_REG_GUEST_FS: - case VM_REG_GUEST_GS: - case VM_REG_GUEST_LDTR: - case VM_REG_GUEST_TR: - seg = vmcb_segptr(vmcb, ident); - KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB", - __func__, ident)); - seg->selector = val; - if (dirtyseg) - svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); - break; - - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - /* GDTR and IDTR don't have segment selectors */ - err = EINVAL; - break; - default: - err = EINVAL; - break; - } - - return (err); -} - -int -vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg2) -{ - struct vmcb_segment *seg; - - seg = vmcb_segptr(vmcb, ident); - if (seg != NULL) { - bcopy(seg, seg2, sizeof(struct vmcb_segment)); - return (0); - } else { - return (EINVAL); - } -} - -int -vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) -{ - struct vmcb *vmcb; - struct svm_softc *sc; - struct vmcb_segment *seg; - uint16_t attrib; - - sc = arg; - vmcb = svm_get_vmcb(sc, vcpu); - - seg = vmcb_segptr(vmcb, reg); - KASSERT(seg != NULL, ("%s: invalid segment descriptor %d", - __func__, reg)); - - seg->base = desc->base; - seg->limit = desc->limit; - if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) { - /* - * Map seg_desc access to VMCB attribute format. - * - * SVM uses the 'P' bit in the segment attributes to indicate a - * NULL segment so clear it if the segment is marked unusable. - */ - attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF); - if (SEG_DESC_UNUSABLE(desc->access)) { - attrib &= ~0x80; - } - seg->attrib = attrib; - } - - VCPU_CTR4(sc->vm, vcpu, "Setting desc %d: base (%#lx), limit (%#x), " - "attrib (%#x)", reg, seg->base, seg->limit, seg->attrib); - - switch (reg) { - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_SS: - svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); - break; - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - svm_set_dirty(sc, vcpu, VMCB_CACHE_DT); - break; - default: - break; + ASSERT(res != NULL); + if (dirtyp != NULL) { + *dirtyp |= dirty; } - - return (0); -} - -int -vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) -{ - struct vmcb *vmcb; - struct svm_softc *sc; - struct vmcb_segment *seg; - - sc = arg; - vmcb = svm_get_vmcb(sc, vcpu); - seg = vmcb_segptr(vmcb, reg); - KASSERT(seg != NULL, ("%s: invalid segment descriptor %d", - __func__, reg)); - - desc->base = seg->base; - desc->limit = seg->limit; - desc->access = 0; - - if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) { - /* Map seg_desc access to VMCB attribute format */ - desc->access = ((seg->attrib & 0xF00) << 4) | - (seg->attrib & 0xFF); - - /* - * VT-x uses bit 16 to indicate a segment that has been loaded - * with a NULL selector (aka unusable). The 'desc->access' - * field is interpreted in the VT-x format by the - * processor-independent code. - * - * SVM uses the 'P' bit to convey the same information so - * convert it into the VT-x format. For more details refer to - * section "Segment State in the VMCB" in APMv2. - */ - if (reg != VM_REG_GUEST_CS && reg != VM_REG_GUEST_TR) { - if ((desc->access & 0x80) == 0) - desc->access |= 0x10000; /* Unusable segment */ - } - } - - return (0); + return (res); } diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h index 88f65df66a..63b088253d 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h +++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h @@ -86,8 +86,8 @@ struct svm_softc; #define VMCB_INTCPT_INVD BIT(22) #define VMCB_INTCPT_PAUSE BIT(23) #define VMCB_INTCPT_HLT BIT(24) -#define VMCB_INTCPT_INVPG BIT(25) -#define VMCB_INTCPT_INVPGA BIT(26) +#define VMCB_INTCPT_INVLPG BIT(25) +#define VMCB_INTCPT_INVLPGA BIT(26) #define VMCB_INTCPT_IO BIT(27) #define VMCB_INTCPT_MSR BIT(28) #define VMCB_INTCPT_TASK_SWITCH BIT(29) @@ -149,12 +149,21 @@ struct svm_softc; #define VMCB_EXIT_POPF 0x71 #define VMCB_EXIT_CPUID 0x72 #define VMCB_EXIT_IRET 0x74 +#define VMCB_EXIT_INVD 0x76 #define VMCB_EXIT_PAUSE 0x77 #define VMCB_EXIT_HLT 0x78 +#define VMCB_EXIT_INVLPG 0x79 +#define VMCB_EXIT_INVLPGA 0x7A #define VMCB_EXIT_IO 0x7B #define VMCB_EXIT_MSR 0x7C #define VMCB_EXIT_SHUTDOWN 0x7F +#define VMCB_EXIT_VMRUN 0x80 +#define VMCB_EXIT_VMMCALL 0x81 +#define VMCB_EXIT_VMLOAD 0x82 #define VMCB_EXIT_VMSAVE 0x83 +#define VMCB_EXIT_STGI 0x84 +#define VMCB_EXIT_CLGI 0x85 +#define VMCB_EXIT_SKINIT 0x86 #define VMCB_EXIT_MONITOR 0x8A #define VMCB_EXIT_MWAIT 0x8B #define VMCB_EXIT_NPF 0x400 @@ -212,15 +221,6 @@ struct svm_softc; #define VMCB_OFF_SYSENTER_EIP VMCB_OFF_STATE(0x238) #define VMCB_OFF_GUEST_PAT VMCB_OFF_STATE(0x268) -/* - * Encode the VMCB offset and bytes that we want to read from VMCB. - */ -#define VMCB_ACCESS(o, w) (0x80000000 | (((w) & 0xF) << 16) | \ - ((o) & 0xFFF)) -#define VMCB_ACCESS_OK(v) ((v) & 0x80000000 ) -#define VMCB_ACCESS_BYTES(v) (((v) >> 16) & 0xF) -#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF) - #ifdef _KERNEL /* VMCB save state area segment format */ struct vmcb_segment { @@ -231,6 +231,10 @@ struct vmcb_segment { }; CTASSERT(sizeof(struct vmcb_segment) == 16); +/* Convert to/from vmcb segment access to generic (VMX) access */ +#define VMCB_ATTR2ACCESS(attr) ((((attr) & 0xf00) << 4) | ((attr) & 0xff)) +#define VMCB_ACCESS2ATTR(acc) ((((acc) & 0xf000) >> 4) | ((acc) & 0xff)) + /* Code segment descriptor attribute in 12 bit format as saved by VMCB. */ #define VMCB_CS_ATTRIB_L BIT(9) /* Long mode. */ #define VMCB_CS_ATTRIB_D BIT(10) /* OPerand size bit. */ @@ -360,6 +364,15 @@ struct vmcb_state { CTASSERT(sizeof(struct vmcb_state) == 0xC00); CTASSERT(offsetof(struct vmcb_state, int_to) == 0x290); +/* + * The VMCB aka Virtual Machine Control Block is a 4KB aligned page + * in memory that describes the virtual machine. + * + * The VMCB contains: + * - instructions or events in the guest to intercept + * - control bits that modify execution environment of the guest + * - guest processor state (e.g. general purpose registers) + */ struct vmcb { struct vmcb_ctrl ctrl; struct vmcb_state state; @@ -367,11 +380,8 @@ struct vmcb { CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); CTASSERT(offsetof(struct vmcb, state) == 0x400); -int vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval); -int vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val); -int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc); -int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc); -int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg); +struct vmcb_segment *vmcb_segptr(struct vmcb *vmcb, int type); +uint64_t *vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp); #endif /* _KERNEL */ #endif /* _VMCB_H_ */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c index 5e5253780e..5e3bd6d309 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/ept.c +++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c @@ -59,7 +59,6 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> -#include "vmx_cpufunc.h" #include "ept.h" #define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0)) @@ -171,31 +170,12 @@ ept_dump(uint64_t *ptp, int nlevels) } #endif -#ifdef __FreeBSD__ -static void -invept_single_context(void *arg) -{ - struct invept_desc desc = *(struct invept_desc *)arg; - - invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); -} - -void -ept_invalidate_mappings(u_long eptp) -{ - struct invept_desc invept_desc = { 0 }; - invept_desc.eptp = eptp; - - smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc); -} -#else /* __FreeBSD__ */ void ept_invalidate_mappings(u_long eptp) { hma_vmx_invept_allcpus((uintptr_t)eptp); } -#endif /* __FreeBSD__ */ static int ept_pinit(pmap_t pmap) diff --git a/usr/src/uts/i86pc/io/vmm/intel/offsets.in b/usr/src/uts/i86pc/io/vmm/intel/offsets.in index cc041eaefc..ca7f967f3b 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/offsets.in +++ b/usr/src/uts/i86pc/io/vmm/intel/offsets.in @@ -22,7 +22,6 @@ #include <machine/pmap.h> #include <machine/vmm.h> -#include "intel/vmx_cpufunc.h" #include "intel/vmx.h" #include "vm/vm_glue.h" diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c index f1a08cc57d..36318b1b49 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c @@ -39,59 +39,24 @@ * * Copyright 2014 Pluribus Networks Inc. * Copyright 2017 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ -#ifdef __FreeBSD__ -#include "opt_ddb.h" -#endif - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> -#include <sys/sysctl.h> #include <sys/systm.h> -#include <sys/pcpu.h> #include <vm/vm.h> -#include <vm/pmap.h> -#include <machine/segments.h> #include <machine/vmm.h> -#include "vmm_host.h" -#include "vmx_cpufunc.h" -#include "vmcs.h" -#include "ept.h" #include "vmx.h" -#ifdef DDB -#include <ddb/ddb.h> -#endif - -SYSCTL_DECL(_hw_vmm_vmx); - -static int no_flush_rsb; -SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, - &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); - -static uint64_t -vmcs_fix_regval(uint32_t encoding, uint64_t val) -{ - - switch (encoding) { - case VMCS_GUEST_CR0: - val = vmx_fix_cr0(val); - break; - case VMCS_GUEST_CR4: - val = vmx_fix_cr4(val); - break; - default: - break; - } - return (val); -} +/* Bits 0-30 of VMX_BASIC MSR contain VMCS revision identifier */ +#define VMX_BASIC_REVISION(v) ((v) & 0x7fffffff) -static uint32_t +uint32_t vmcs_field_encoding(int ident) { switch (ident) { @@ -138,15 +103,13 @@ vmcs_field_encoding(int ident) case VM_REG_GUEST_ENTRY_INST_LENGTH: return (VMCS_ENTRY_INST_LENGTH); default: - return (-1); + return (VMCS_INVALID_ENCODING); } - } -static int +void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) { - switch (seg) { case VM_REG_GUEST_ES: *base = VMCS_GUEST_ES_BASE; @@ -199,364 +162,111 @@ vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) *acc = VMCS_INVALID_ENCODING; break; default: - return (EINVAL); + panic("invalid segment register %d", seg); } - - return (0); } -int -vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval) +void +vmcs_clear(uintptr_t vmcs_pa) { - int error; - uint32_t encoding; - - /* - * If we need to get at vmx-specific state in the VMCS we can bypass - * the translation of 'ident' to 'encoding' by simply setting the - * sign bit. As it so happens the upper 16 bits are reserved (i.e - * set to 0) in the encodings for the VMCS so we are free to use the - * sign bit. - */ - if (ident < 0) - encoding = ident & 0x7fffffff; - else - encoding = vmcs_field_encoding(ident); - - if (encoding == (uint32_t)-1) - return (EINVAL); + int err; - if (!running) - VMPTRLD(vmcs); + __asm __volatile("vmclear %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); - error = vmread(encoding, retval); - - if (!running) - VMCLEAR(vmcs); - - return (error); -} - -int -vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) -{ - int error; - uint32_t encoding; - - if (ident < 0) - encoding = ident & 0x7fffffff; - else - encoding = vmcs_field_encoding(ident); - - if (encoding == (uint32_t)-1) - return (EINVAL); - - val = vmcs_fix_regval(encoding, val); - - if (!running) - VMPTRLD(vmcs); - - error = vmwrite(encoding, val); - - if (!running) - VMCLEAR(vmcs); - - return (error); -} - -int -vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) -{ - int error; - uint32_t base, limit, access; - - error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); - if (error != 0) - panic("vmcs_setdesc: invalid segment register %d", seg); - - if (!running) - VMPTRLD(vmcs); - if ((error = vmwrite(base, desc->base)) != 0) - goto done; - - if ((error = vmwrite(limit, desc->limit)) != 0) - goto done; - - if (access != VMCS_INVALID_ENCODING) { - if ((error = vmwrite(access, desc->access)) != 0) - goto done; + if (err != 0) { + panic("vmclear(%p) error %d", (void *)vmcs_pa, err); } -done: - if (!running) - VMCLEAR(vmcs); - return (error); -} - -int -vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) -{ - int error; - uint32_t base, limit, access; - uint64_t u64; - - error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); - if (error != 0) - panic("vmcs_getdesc: invalid segment register %d", seg); - if (!running) - VMPTRLD(vmcs); - if ((error = vmread(base, &u64)) != 0) - goto done; - desc->base = u64; - - if ((error = vmread(limit, &u64)) != 0) - goto done; - desc->limit = u64; - - if (access != VMCS_INVALID_ENCODING) { - if ((error = vmread(access, &u64)) != 0) - goto done; - desc->access = u64; - } -done: - if (!running) - VMCLEAR(vmcs); - return (error); + /* + * A call to critical_enter() was made in vmcs_load() to prevent + * preemption. Now that the VMCS is unloaded, it is safe to relax that + * restriction. + */ + critical_exit(); } -int -vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count) +void +vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa) { - int error; + int err; - VMPTRLD(vmcs); + /* set to VMCS revision */ + vmcs->identifier = VMX_BASIC_REVISION(rdmsr(MSR_VMX_BASIC)); /* - * Guest MSRs are saved in the VM-exit MSR-store area. - * Guest MSRs are loaded from the VM-entry MSR-load area. - * Both areas point to the same location in memory. + * Perform a vmclear on the VMCS, but without the critical section + * manipulation as done by vmcs_clear() above. */ - if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0) - goto done; - if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0) - goto done; - - if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0) - goto done; - if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0) - goto done; - - error = 0; -done: - VMCLEAR(vmcs); - return (error); + __asm __volatile("vmclear %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); + + if (err != 0) { + panic("vmclear(%p) error %d", (void *)vmcs_pa, err); + } } -int -vmcs_init(struct vmcs *vmcs) +void +vmcs_load(uintptr_t vmcs_pa) { - int error, codesel, datasel, tsssel; - u_long cr0, cr4, efer; - uint64_t pat; -#ifdef __FreeBSD__ - uint64_t fsbase, idtrbase; -#endif - - codesel = vmm_get_host_codesel(); - datasel = vmm_get_host_datasel(); - tsssel = vmm_get_host_tsssel(); + int err; /* - * Make sure we have a "current" VMCS to work with. + * While the VMCS is loaded on the CPU for subsequent operations, it is + * important that the thread not be preempted. That is ensured with + * critical_enter() here, with a matching critical_exit() call in + * vmcs_clear() once the VMCS is unloaded. */ - VMPTRLD(vmcs); - - /* Host state */ - - /* Initialize host IA32_PAT MSR */ - pat = vmm_get_host_pat(); - if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0) - goto done; + critical_enter(); - /* Load the IA32_EFER MSR */ - efer = vmm_get_host_efer(); - if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0) - goto done; + __asm __volatile("vmptrld %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); - /* Load the control registers */ - - cr0 = vmm_get_host_cr0(); - if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) - goto done; - - cr4 = vmm_get_host_cr4() | CR4_VMXE; - if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) - goto done; - - /* Load the segment selectors */ - if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0) - goto done; - -#ifdef __FreeBSD__ - if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0) - goto done; -#else - if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel())) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel())) != 0) - goto done; -#endif - - if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0) - goto done; - -#ifdef __FreeBSD__ - /* - * Load the Base-Address for %fs and idtr. - * - * Note that we exclude %gs, tss and gdtr here because their base - * address is pcpu specific. - */ - fsbase = vmm_get_host_fsbase(); - if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0) - goto done; - - idtrbase = vmm_get_host_idtrbase(); - if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0) - goto done; + if (err != 0) { + panic("vmptrld(%p) error %d", (void *)vmcs_pa, err); + } +} -#else /* __FreeBSD__ */ - /* - * Configure host sysenter MSRs to be restored on VM exit. - * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run. - */ - if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL)) != 0) - goto done; - /* Natively defined as MSR_INTC_SEP_EIP */ - if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_EIP, - rdmsr(MSR_SYSENTER_EIP_MSR))) != 0) - goto done; +uint64_t +vmcs_read(uint32_t encoding) +{ + int error; + uint64_t val; -#endif /* __FreeBSD__ */ + __asm __volatile("vmread %[enc], %[val];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error), [val] "=r" (val) + : [enc] "r" ((uint64_t)encoding) + : "memory"); - /* instruction pointer */ - if (no_flush_rsb) { - if ((error = vmwrite(VMCS_HOST_RIP, - (u_long)vmx_exit_guest)) != 0) - goto done; - } else { - if ((error = vmwrite(VMCS_HOST_RIP, - (u_long)vmx_exit_guest_flush_rsb)) != 0) - goto done; + if (error != 0) { + panic("vmread(%x) error %d", encoding, error); } - /* link pointer */ - if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) - goto done; -done: - VMCLEAR(vmcs); - return (error); + return (val); } -#ifdef DDB -extern int vmxon_enabled[]; - -DB_SHOW_COMMAND(vmcs, db_show_vmcs) +void +vmcs_write(uint32_t encoding, uint64_t val) { - uint64_t cur_vmcs, val; - uint32_t exit; - - if (!vmxon_enabled[curcpu]) { - db_printf("VMX not enabled\n"); - return; - } + int error; - if (have_addr) { - db_printf("Only current VMCS supported\n"); - return; - } + __asm __volatile("vmwrite %[val], %[enc];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error) + : [val] "r" (val), [enc] "r" ((uint64_t)encoding) + : "memory"); - vmptrst(&cur_vmcs); - if (cur_vmcs == VMCS_INITIAL) { - db_printf("No current VM context\n"); - return; - } - db_printf("VMCS: %jx\n", cur_vmcs); - db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID)); - db_printf("Activity: "); - val = vmcs_read(VMCS_GUEST_ACTIVITY); - switch (val) { - case 0: - db_printf("Active"); - break; - case 1: - db_printf("HLT"); - break; - case 2: - db_printf("Shutdown"); - break; - case 3: - db_printf("Wait for SIPI"); - break; - default: - db_printf("Unknown: %#lx", val); - } - db_printf("\n"); - exit = vmcs_read(VMCS_EXIT_REASON); - if (exit & 0x80000000) - db_printf("Entry Failure Reason: %u\n", exit & 0xffff); - else - db_printf("Exit Reason: %u\n", exit & 0xffff); - db_printf("Qualification: %#lx\n", vmcs_exit_qualification()); - db_printf("Guest Linear Address: %#lx\n", - vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); - switch (exit & 0x8000ffff) { - case EXIT_REASON_EXCEPTION: - case EXIT_REASON_EXT_INTR: - val = vmcs_read(VMCS_EXIT_INTR_INFO); - db_printf("Interrupt Type: "); - switch (val >> 8 & 0x7) { - case 0: - db_printf("external"); - break; - case 2: - db_printf("NMI"); - break; - case 3: - db_printf("HW exception"); - break; - case 4: - db_printf("SW exception"); - break; - default: - db_printf("?? %lu", val >> 8 & 0x7); - break; - } - db_printf(" Vector: %lu", val & 0xff); - if (val & 0x800) - db_printf(" Error Code: %lx", - vmcs_read(VMCS_EXIT_INTR_ERRCODE)); - db_printf("\n"); - break; - case EXIT_REASON_EPT_FAULT: - case EXIT_REASON_EPT_MISCONFIG: - db_printf("Guest Physical Address: %#lx\n", - vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)); - break; + if (error != 0) { + panic("vmwrite(%x, %lx) error %d", encoding, val, error); } - db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error()); } -#endif diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h index edde5c6dd5..1713872556 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h @@ -30,6 +30,7 @@ /* * Copyright 2017 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #ifndef _VMCS_H_ @@ -41,125 +42,20 @@ struct vmcs { uint32_t identifier; uint32_t abort_code; char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2]; -#ifndef __FreeBSD__ - /* - * Keep the physical address of the VMCS cached adjacent for the - * structure so it can be referenced in contexts which are too delicate - * for a call into the HAT. For the moment it means wasting a whole - * page on padding for the PA value to maintain alignment, but it - * allows the consumers of 'struct vmcs *' to easily access the value - * without a significant change to the interface. - */ - uint64_t vmcs_pa; - char _pa_pad[PAGE_SIZE - sizeof (vm_paddr_t)]; -#endif }; -#ifdef __FreeBSD__ -CTASSERT(sizeof(struct vmcs) == PAGE_SIZE); -#else -CTASSERT(sizeof(struct vmcs) == (2*PAGE_SIZE)); -#endif +CTASSERT(sizeof (struct vmcs) == PAGE_SIZE); -/* MSR save region is composed of an array of 'struct msr_entry' */ -struct msr_entry { - uint32_t index; - uint32_t reserved; - uint64_t val; +uint32_t vmcs_field_encoding(int ident); +void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, + uint32_t *acc); -}; +void vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa); -int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count); -int vmcs_init(struct vmcs *vmcs); -int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv); -int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val); -int vmcs_getdesc(struct vmcs *vmcs, int running, int ident, - struct seg_desc *desc); -int vmcs_setdesc(struct vmcs *vmcs, int running, int ident, - struct seg_desc *desc); +void vmcs_load(uintptr_t vmcs_pa); +void vmcs_clear(uintptr_t vmcs_pa); -/* - * Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h - */ -#ifdef _VMX_CPUFUNC_H_ -static __inline uint64_t -vmcs_read(uint32_t encoding) -{ - int error; - uint64_t val; - - error = vmread(encoding, &val); - KASSERT(error == 0, ("vmcs_read(%u) error %d", encoding, error)); - return (val); -} - -static __inline void -vmcs_write(uint32_t encoding, uint64_t val) -{ - int error; - - error = vmwrite(encoding, val); - KASSERT(error == 0, ("vmcs_write(%u) error %d", encoding, error)); -} - -#ifndef __FreeBSD__ -/* - * Due to header complexity combined with the need to cache the physical - * address for the VMCS, these must be defined here rather than vmx_cpufunc.h. - */ -static __inline int -vmclear(struct vmcs *vmcs) -{ - int error; - uint64_t addr = vmcs->vmcs_pa; - - __asm __volatile("vmclear %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} - -static __inline int -vmptrld(struct vmcs *vmcs) -{ - int error; - uint64_t addr = vmcs->vmcs_pa; - - __asm __volatile("vmptrld %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} - -static __inline void -VMCLEAR(struct vmcs *vmcs) -{ - int err; - - err = vmclear(vmcs); - if (err != 0) - panic("%s: vmclear(%p) error %d", __func__, vmcs, err); - - critical_exit(); -} - -static __inline void -VMPTRLD(struct vmcs *vmcs) -{ - int err; - - critical_enter(); - - err = vmptrld(vmcs); - if (err != 0) - panic("%s: vmptrld(%p) error %d", __func__, vmcs, err); -} -#endif /* __FreeBSD__ */ - -#endif /* _VMX_CPUFUNC_H_ */ +uint64_t vmcs_read(uint32_t encoding); +void vmcs_write(uint32_t encoding, uint64_t val); #define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH) #define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP) @@ -177,7 +73,6 @@ VMPTRLD(struct vmcs *vmcs) #define VMCS_INITIAL 0xffffffffffffffff -#define VMCS_IDENT(encoding) ((encoding) | 0x80000000) /* * VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B. */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index c46560948e..3e511b9f66 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -88,7 +88,6 @@ __FBSDID("$FreeBSD$"); #include "vlapic_priv.h" #include "ept.h" -#include "vmx_cpufunc.h" #include "vmcs.h" #include "vmx.h" #include "vmx_msr.h" @@ -172,11 +171,6 @@ SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); -#ifdef __FreeBSD__ -int vmxon_enabled[MAXCPU]; -static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); -#endif /*__FreeBSD__ */ - static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; static uint32_t exit_ctls, entry_ctls; @@ -196,10 +190,15 @@ static int vmx_initialized; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD, &vmx_initialized, 0, "Intel VMX initialized"); +static int no_flush_rsb; +SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, + &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); + /* * Optional capabilities */ #ifdef __FreeBSD__ +SYSCTL_DECL(_hw_vmm_vmx); static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); @@ -228,7 +227,9 @@ static int pirvec = -1; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD, &pirvec, 0, "APICv posted interrupt vector"); +#ifdef __FreeBSD__ static struct unrhdr *vpid_unr; +#endif /* __FreeBSD__*/ static u_int vpid_alloc_failed; SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD, &vpid_alloc_failed, 0, NULL); @@ -240,6 +241,13 @@ int guest_l1d_flush_sw; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD, &guest_l1d_flush_sw, 0, NULL); +/* MSR save region is composed of an array of 'struct msr_entry' */ +struct msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t val; +}; + static struct msr_entry msr_load_list[1] __aligned(16); /* @@ -330,11 +338,8 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return, static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); -static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); static void vmx_inject_pir(struct vlapic *vlapic); -#ifndef __FreeBSD__ -static int vmx_apply_tsc_adjust(struct vmx *, int); -#endif /* __FreeBSD__ */ +static void vmx_apply_tsc_adjust(struct vmx *, int); #ifdef KTR static const char * @@ -504,17 +509,15 @@ vmx_allow_x2apic_msrs(struct vmx *vmx) return (error); } -u_long +static u_long vmx_fix_cr0(u_long cr0) { - return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask); } -u_long +static u_long vmx_fix_cr4(u_long cr4) { - return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask); } @@ -845,45 +848,12 @@ vmx_trigger_hostintr(int vector) #endif /* __FreeBSD__ */ } -static int -vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial) -{ - int error, mask_ident, shadow_ident; - uint64_t mask_value; - - if (which != 0 && which != 4) - panic("vmx_setup_cr_shadow: unknown cr%d", which); - - if (which == 0) { - mask_ident = VMCS_CR0_MASK; - mask_value = cr0_ones_mask | cr0_zeros_mask; - shadow_ident = VMCS_CR0_SHADOW; - } else { - mask_ident = VMCS_CR4_MASK; - mask_value = cr4_ones_mask | cr4_zeros_mask; - shadow_ident = VMCS_CR4_SHADOW; - } - - error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value); - if (error) - return (error); - - error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial); - if (error) - return (error); - - return (0); -} -#define vmx_setup_cr0_shadow(vmcs,init) vmx_setup_cr_shadow(0, (vmcs), (init)) -#define vmx_setup_cr4_shadow(vmcs,init) vmx_setup_cr_shadow(4, (vmcs), (init)) - static void * vmx_vminit(struct vm *vm, pmap_t pmap) { uint16_t vpid[VM_MAXCPU]; - int i, error; + int i, error, datasel; struct vmx *vmx; - struct vmcs *vmcs; uint32_t exc_bitmap; uint16_t maxcpus; uint32_t proc_ctls, proc2_ctls, pin_ctls; @@ -972,6 +942,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) } maxcpus = vm_get_maxcpus(vm); + datasel = vmm_get_host_datasel(); for (i = 0; i < maxcpus; i++) { /* * Cache physical address lookups for various components which @@ -982,31 +953,58 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]); vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]); - vmcs = &vmx->vmcs[i]; - vmcs->identifier = vmx_revision(); - vmcs->vmcs_pa = (uint64_t)vtophys(vmcs); - error = vmclear(vmcs); - if (error != 0) { - panic("vmx_vminit: vmclear error %d on vcpu %d\n", - error, i); - } + vmx->vmcs_pa[i] = (uintptr_t)vtophys(&vmx->vmcs[i]); + vmcs_initialize(&vmx->vmcs[i], vmx->vmcs_pa[i]); vmx_msr_guest_init(vmx, i); - error = vmcs_init(vmcs); - KASSERT(error == 0, ("vmcs_init error %d", error)); + vmcs_load(vmx->vmcs_pa[i]); - VMPTRLD(vmcs); - error = 0; + vmcs_write(VMCS_HOST_IA32_PAT, vmm_get_host_pat()); + vmcs_write(VMCS_HOST_IA32_EFER, vmm_get_host_efer()); + + /* Load the control registers */ + vmcs_write(VMCS_HOST_CR0, vmm_get_host_cr0()); + vmcs_write(VMCS_HOST_CR4, vmm_get_host_cr4() | CR4_VMXE); + + /* Load the segment selectors */ + vmcs_write(VMCS_HOST_CS_SELECTOR, vmm_get_host_codesel()); + + vmcs_write(VMCS_HOST_ES_SELECTOR, datasel); + vmcs_write(VMCS_HOST_SS_SELECTOR, datasel); + vmcs_write(VMCS_HOST_DS_SELECTOR, datasel); - error += vmwrite(VMCS_EPTP, vmx->eptp); - error += vmwrite(VMCS_PIN_BASED_CTLS, pin_ctls); - error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); - error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); - error += vmwrite(VMCS_EXIT_CTLS, exit_ctls); - error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls); - error += vmwrite(VMCS_MSR_BITMAP, msr_bitmap_pa); - error += vmwrite(VMCS_VPID, vpid[i]); + vmcs_write(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel()); + vmcs_write(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel()); + vmcs_write(VMCS_HOST_TR_SELECTOR, vmm_get_host_tsssel()); + + /* + * Configure host sysenter MSRs to be restored on VM exit. + * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run. + */ + vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL); + vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP, + rdmsr(MSR_SYSENTER_EIP_MSR)); + + /* instruction pointer */ + if (no_flush_rsb) { + vmcs_write(VMCS_HOST_RIP, (uint64_t)vmx_exit_guest); + } else { + vmcs_write(VMCS_HOST_RIP, + (uint64_t)vmx_exit_guest_flush_rsb); + } + + /* link pointer */ + vmcs_write(VMCS_LINK_POINTER, ~0); + + vmcs_write(VMCS_EPTP, vmx->eptp); + vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls); + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); + vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); + vmcs_write(VMCS_EXIT_CTLS, exit_ctls); + vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); + vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa); + vmcs_write(VMCS_VPID, vpid[i]); if (guest_l1d_flush && !guest_l1d_flush_sw) { vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract( @@ -1022,28 +1020,39 @@ vmx_vminit(struct vm *vm, pmap_t pmap) exc_bitmap = 0xffffffff; else exc_bitmap = 1 << IDT_MC; - error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap); + vmcs_write(VMCS_EXCEPTION_BITMAP, exc_bitmap); vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1; - error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); + vmcs_write(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { - error += vmwrite(VMCS_VIRTUAL_APIC, apic_page_pa); + vmcs_write(VMCS_VIRTUAL_APIC, apic_page_pa); } if (vmx_cap_en(vmx, VMX_CAP_APICV)) { - error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); - error += vmwrite(VMCS_EOI_EXIT0, 0); - error += vmwrite(VMCS_EOI_EXIT1, 0); - error += vmwrite(VMCS_EOI_EXIT2, 0); - error += vmwrite(VMCS_EOI_EXIT3, 0); + vmcs_write(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); + vmcs_write(VMCS_EOI_EXIT0, 0); + vmcs_write(VMCS_EOI_EXIT1, 0); + vmcs_write(VMCS_EOI_EXIT2, 0); + vmcs_write(VMCS_EOI_EXIT3, 0); } if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { - error += vmwrite(VMCS_PIR_VECTOR, pirvec); - error += vmwrite(VMCS_PIR_DESC, pir_desc_pa); + vmcs_write(VMCS_PIR_VECTOR, pirvec); + vmcs_write(VMCS_PIR_DESC, pir_desc_pa); } - VMCLEAR(vmcs); - KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs")); + + /* + * Set up the CR0/4 masks and configure the read shadow state + * to the power-on register value from the Intel Sys Arch. + * CR0 - 0x60000010 + * CR4 - 0 + */ + vmcs_write(VMCS_CR0_MASK, cr0_ones_mask | cr0_zeros_mask); + vmcs_write(VMCS_CR0_SHADOW, 0x60000010); + vmcs_write(VMCS_CR4_MASK, cr4_ones_mask | cr4_zeros_mask); + vmcs_write(VMCS_CR4_SHADOW, 0); + + vmcs_clear(vmx->vmcs_pa[i]); vmx->cap[i].set = 0; vmx->cap[i].proc_ctls = proc_ctls; @@ -1054,19 +1063,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vmx->state[i].lastcpu = NOCPU; vmx->state[i].vpid = vpid[i]; - /* - * Set up the CR0/4 shadows, and init the read shadow - * to the power-on register value from the Intel Sys Arch. - * CR0 - 0x60000010 - * CR4 - 0 - */ - error = vmx_setup_cr0_shadow(vmcs, 0x60000010); - if (error != 0) - panic("vmx_setup_cr0_shadow %d", error); - - error = vmx_setup_cr4_shadow(vmcs, 0); - if (error != 0) - panic("vmx_setup_cr4_shadow %d", error); vmx->ctx[i].pmap = pmap; } @@ -1095,7 +1091,7 @@ static __inline void vmx_run_trace(struct vmx *vmx, int vcpu) { #ifdef KTR - VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip()); + VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %lx", vmcs_guest_rip()); #endif } @@ -1123,6 +1119,33 @@ vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip) static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved"); static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done"); +#define INVVPID_TYPE_ADDRESS 0UL +#define INVVPID_TYPE_SINGLE_CONTEXT 1UL +#define INVVPID_TYPE_ALL_CONTEXTS 2UL + +struct invvpid_desc { + uint16_t vpid; + uint16_t _res1; + uint32_t _res2; + uint64_t linear_addr; +}; +CTASSERT(sizeof(struct invvpid_desc) == 16); + +static __inline void +invvpid(uint64_t type, struct invvpid_desc desc) +{ + int error; + + __asm __volatile("invvpid %[desc], %[type];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error) + : [desc] "m" (desc), [type] "r" (type) + : "memory"); + + if (error) + panic("invvpid error %d", error); +} + /* * Invalidate guest mappings identified by its vpid from the TLB. */ @@ -1190,7 +1213,6 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) { struct vmxstate *vmxstate; -#ifndef __FreeBSD__ /* * Regardless of whether the VM appears to have migrated between CPUs, * save the host sysenter stack pointer. As it points to the kernel @@ -1203,8 +1225,7 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) * Perform any needed TSC_OFFSET adjustment based on TSC_MSR writes or * migration between host CPUs with differing TSC values. */ - VERIFY0(vmx_apply_tsc_adjust(vmx, vcpu)); -#endif + vmx_apply_tsc_adjust(vmx, vcpu); vmxstate = &vmx->state[vcpu]; if (vmxstate->lastcpu == curcpu) @@ -1214,10 +1235,8 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1); -#ifndef __FreeBSD__ /* Load the per-CPU IDT address */ vmcs_write(VMCS_HOST_IDTR_BASE, vmm_get_host_idtrbase()); -#endif vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase()); vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase()); vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase()); @@ -1245,7 +1264,7 @@ vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu) { KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0, - ("intr_window_exiting not set: %#x", vmx->cap[vcpu].proc_ctls)); + ("intr_window_exiting not set: %x", vmx->cap[vcpu].proc_ctls)); vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING; vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting"); @@ -1267,29 +1286,12 @@ vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) { KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0, - ("nmi_window_exiting not set %#x", vmx->cap[vcpu].proc_ctls)); + ("nmi_window_exiting not set %x", vmx->cap[vcpu].proc_ctls)); vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING; vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); } -#ifdef __FreeBSD__ -int -vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) -{ - int error; - - if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) { - vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET; - vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); - VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting"); - } - - error = vmwrite(VMCS_TSC_OFFSET, offset); - - return (error); -} -#else /* __FreeBSD__ */ /* * Set the TSC adjustment, taking into account the offsets measured between * host physical CPUs. This is required even if the guest has not set a TSC @@ -1297,24 +1299,20 @@ vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) * migrated onto. Without this mitigation, un-synched host TSCs will convey * the appearance of TSC time-travel to the guest as its vCPUs migrate. */ -static int +static void vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu) { extern hrtime_t tsc_gethrtime_tick_delta(void); const uint64_t target_offset = (vcpu_tsc_offset(vmx->vm, vcpu) + (uint64_t)tsc_gethrtime_tick_delta()); - int error = 0; ASSERT(vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET); if (vmx->tsc_offset_active[vcpu] != target_offset) { - error = vmwrite(VMCS_TSC_OFFSET, target_offset); + vmcs_write(VMCS_TSC_OFFSET, target_offset); vmx->tsc_offset_active[vcpu] = target_offset; } - - return (error); } -#endif /* __FreeBSD__ */ #define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) @@ -1333,11 +1331,11 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu) gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest " - "interruptibility-state %#x", gi)); + "interruptibility-state %x", gi)); info = vmcs_read(VMCS_ENTRY_INTR_INFO); KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid " - "VM-entry interruption information %#x", info)); + "VM-entry interruption information %x", info)); /* * Inject the virtual NMI. The vector must be the NMI IDT entry @@ -1373,7 +1371,7 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, if (vmx->state[vcpu].nextrip != guestrip && (gi & HWINTR_BLOCKING) != 0) { VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking " - "cleared due to rip change: %#lx/%#lx", + "cleared due to rip change: %lx/%lx", vmx->state[vcpu].nextrip, guestrip); gi &= ~HWINTR_BLOCKING; vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); @@ -1390,10 +1388,10 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry " - "intinfo is not valid: %#lx", __func__, entryinfo)); + "intinfo is not valid: %lx", __func__, entryinfo)); KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject " - "pending exception: %#lx/%#x", __func__, entryinfo, info)); + "pending exception: %lx/%x", __func__, entryinfo, info)); info = entryinfo; vector = info & 0xff; @@ -1432,11 +1430,11 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, need_nmi_exiting = 0; } else { VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI " - "due to VM-entry intr info %#x", info); + "due to VM-entry intr info %x", info); } } else { VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to " - "Guest Interruptibility-state %#x", gi); + "Guest Interruptibility-state %x", gi); } if (need_nmi_exiting) { @@ -1483,18 +1481,18 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, */ if ((gi & HWINTR_BLOCKING) != 0) { VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " - "Guest Interruptibility-state %#x", vector, gi); + "Guest Interruptibility-state %x", vector, gi); goto cantinject; } if ((info & VMCS_INTR_VALID) != 0) { VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " - "VM-entry intr info %#x", vector, info); + "VM-entry intr info %x", vector, info); goto cantinject; } rflags = vmcs_read(VMCS_GUEST_RFLAGS); if ((rflags & PSL_I) == 0) { VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " - "rflags %#lx", vector, rflags); + "rflags %lx", vector, rflags); goto cantinject; } @@ -1573,7 +1571,7 @@ vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING, - ("NMI blocking is not in effect %#x", gi)); + ("NMI blocking is not in effect %x", gi)); } static int @@ -1949,10 +1947,11 @@ vmexit_inout(struct vm_exit *vmexit, struct vie *vie, uint64_t qual, inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO); /* - * Bits 7-9 encode the address size of ins/outs operations where - * the 0/1/2 values correspond to 16/32/64 bit sizes. + * According to the SDM, bits 9:7 encode the address size of the + * ins/outs operation, but only values 0/1/2 are expected, + * corresponding to 16/32/64 bit sizes. */ - inout->addrsize = 2 << (1 + ((inst_info >> 7) & 0x3)); + inout->addrsize = 2 << BITX(inst_info, 9, 7); VERIFY(inout->addrsize == 2 || inout->addrsize == 4 || inout->addrsize == 8); @@ -2224,9 +2223,7 @@ emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) static int emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) { - struct vmxctx *vmxctx; uint64_t result; - uint32_t eax, edx; int error; if (lapic_msr(num)) @@ -2235,14 +2232,8 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) error = vmx_rdmsr(vmx, vcpuid, num, &result, retu); if (error == 0) { - eax = result; - vmxctx = &vmx->ctx[vcpuid]; - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax); - KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error)); - - edx = result >> 32; - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx); - KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error)); + vmx->ctx[vcpuid].guest_rax = (uint32_t)result; + vmx->ctx[vcpuid].guest_rdx = result >> 32; } return (error); @@ -2260,7 +2251,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) struct vie *vie; struct vlapic *vlapic; struct vm_task_switch *ts; - uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; + uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info; uint32_t intr_type, intr_vec, reason; uint64_t exitintinfo, qual, gpa; bool retu; @@ -2367,7 +2358,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) */ if (ts->reason == TSR_IDT_GATE) { KASSERT(idtvec_info & VMCS_IDT_VEC_VALID, - ("invalid idtvec_info %#x for IDT task switch", + ("invalid idtvec_info %x for IDT task switch", idtvec_info)); intr_type = idtvec_info & VMCS_INTR_T_MASK; if (intr_type != VMCS_INTR_T_SWINTR && @@ -2496,7 +2487,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) return (1); KASSERT((intr_info & VMCS_INTR_VALID) != 0 && (intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR, - ("VM exit interruption info invalid: %#x", intr_info)); + ("VM exit interruption info invalid: %x", intr_info)); vmx_trigger_hostintr(intr_info & 0xff); /* @@ -2528,7 +2519,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1); intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); KASSERT((intr_info & VMCS_INTR_VALID) != 0, - ("VM exit interruption info invalid: %#x", intr_info)); + ("VM exit interruption info invalid: %x", intr_info)); intr_vec = intr_info & 0xff; intr_type = intr_info & VMCS_INTR_T_MASK; @@ -2580,9 +2571,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } if (intr_vec == IDT_PF) { - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual); - KASSERT(error == 0, ("%s: vmxctx_setreg(cr2) error %d", - __func__, error)); + vmxctx->guest_cr2 = qual; } /* @@ -2600,7 +2589,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) errcode_valid = 1; errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE); } - VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into " + VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%x into " "the guest", intr_vec, errcode); SDT_PROBE5(vmm, vmx, exit, exception, vmx, vcpu, vmexit, intr_vec, errcode); @@ -2790,11 +2779,11 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); KASSERT((intr_info & VMCS_INTR_VALID) != 0, - ("VM exit interruption info invalid: %#x", intr_info)); + ("VM exit interruption info invalid: %x", intr_info)); if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) { KASSERT((intr_info & 0xff) == IDT_NMI, ("VM exit due " - "to NMI has invalid vector: %#x", intr_info)); + "to NMI has invalid vector: %x", intr_info)); VCPU_CTR0(vmx->vm, vcpuid, "Vectoring to NMI handler"); #ifdef __FreeBSD__ __asm __volatile("int $2"); @@ -2807,7 +2796,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) static __inline void vmx_dr_enter_guest(struct vmxctx *vmxctx) { - register_t rflags; + uint64_t rflags; /* Save host control debug registers. */ vmxctx->host_dr7 = rdr7(); @@ -2872,14 +2861,14 @@ vmx_dr_leave_guest(struct vmxctx *vmxctx) } static int -vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, +vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap, struct vm_eventinfo *evinfo) { int rc, handled, launched; struct vmx *vmx; struct vm *vm; struct vmxctx *vmxctx; - struct vmcs *vmcs; + uintptr_t vmcs_pa; struct vm_exit *vmexit; struct vlapic *vlapic; uint32_t exit_reason; @@ -2890,7 +2879,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx = arg; vm = vmx->vm; - vmcs = &vmx->vmcs[vcpu]; + vmcs_pa = vmx->vmcs_pa[vcpu]; vmxctx = &vmx->ctx[vcpu]; vlapic = vm_lapic(vm, vcpu); vmexit = vm_exitinfo(vm, vcpu); @@ -2901,7 +2890,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx_msr_guest_enter(vmx, vcpu); - VMPTRLD(vmcs); + vmcs_load(vmcs_pa); #ifndef __FreeBSD__ VERIFY(vmx->vmcs_state[vcpu] == VS_NONE && curthread->t_preempt != 0); @@ -2922,7 +2911,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx_set_pcpu_defaults(vmx, vcpu, pmap); do { KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch " - "%#lx/%#lx", __func__, vmcs_guest_rip(), rip)); + "%lx/%lx", __func__, vmcs_guest_rip(), rip)); handled = UNHANDLED; /* @@ -3115,7 +3104,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d", vmexit->exitcode); - VMCLEAR(vmcs); + vmcs_clear(vmcs_pa); vmx_msr_guest_exit(vmx, vcpu); #ifndef __FreeBSD__ @@ -3145,10 +3134,9 @@ vmx_vmcleanup(void *arg) return; } -static register_t * +static uint64_t * vmxctx_regptr(struct vmxctx *vmxctx, int reg) { - switch (reg) { case VM_REG_GUEST_RAX: return (&vmxctx->guest_rax); @@ -3199,157 +3187,129 @@ vmxctx_regptr(struct vmxctx *vmxctx, int reg) } static int -vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval) +vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) { - register_t *regp; - - if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { - *retval = *regp; - return (0); - } else - return (EINVAL); -} + int running, hostcpu, err; + struct vmx *vmx = arg; + uint64_t *regp; -static int -vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val) -{ - register_t *regp; + running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); - if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { - *regp = val; + /* VMCS access not required for ctx reads */ + if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { + *retval = *regp; return (0); - } else - return (EINVAL); -} - -static int -vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval) -{ - uint64_t gi; - int error; - - error = vmcs_getreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi); - *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; - return (error); -} - -static int -vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val) -{ - struct vmcs *vmcs; - uint64_t gi; - int error, ident; - - /* - * Forcing the vcpu into an interrupt shadow is not supported. - */ - if (val) { - error = EINVAL; - goto done; } - vmcs = &vmx->vmcs[vcpu]; - ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY); - error = vmcs_getreg(vmcs, running, ident, &gi); - if (error == 0) { - gi &= ~HWINTR_BLOCKING; - error = vmcs_setreg(vmcs, running, ident, gi); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); } -done: - VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val, - error ? "failed" : "succeeded"); - return (error); -} - -static int -vmx_shadow_reg(int reg) -{ - int shreg; - shreg = -1; + err = EINVAL; + if (reg == VM_REG_GUEST_INTR_SHADOW) { + uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); + *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; + err = 0; + } else { + uint32_t encoding; - switch (reg) { - case VM_REG_GUEST_CR0: - shreg = VMCS_CR0_SHADOW; - break; - case VM_REG_GUEST_CR4: - shreg = VMCS_CR4_SHADOW; - break; - default: - break; + encoding = vmcs_field_encoding(reg); + if (encoding != VMCS_INVALID_ENCODING) { + *retval = vmcs_read(encoding); + err = 0; + } } - return (shreg); -} - -static int -vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) -{ - int running, hostcpu; - struct vmx *vmx = arg; - - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); - - if (reg == VM_REG_GUEST_INTR_SHADOW) - return (vmx_get_intr_shadow(vmx, vcpu, running, retval)); - - if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0) - return (0); + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } - return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval)); + return (err); } static int vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) { - int error, hostcpu, running, shadow; - uint64_t ctls; - pmap_t pmap; + int running, hostcpu, error; struct vmx *vmx = arg; + uint64_t *regp; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu); - if (reg == VM_REG_GUEST_INTR_SHADOW) - return (vmx_modify_intr_shadow(vmx, vcpu, running, val)); - - if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0) + /* VMCS access not required for ctx writes */ + if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { + *regp = val; return (0); + } - error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val); - - if (error == 0) { - /* - * If the "load EFER" VM-entry control is 1 then the - * value of EFER.LMA must be identical to "IA-32e mode guest" - * bit in the VM-entry control. - */ - if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 && - (reg == VM_REG_GUEST_EFER)) { - vmcs_getreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls); - if (val & EFER_LMA) - ctls |= VM_ENTRY_GUEST_LMA; - else - ctls &= ~VM_ENTRY_GUEST_LMA; - vmcs_setreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_ENTRY_CTLS), ctls); - } + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } - shadow = vmx_shadow_reg(reg); - if (shadow > 0) { + if (reg == VM_REG_GUEST_INTR_SHADOW) { + if (val != 0) { /* - * Store the unmodified value in the shadow + * Forcing the vcpu into an interrupt shadow is not + * presently supported. */ - error = vmcs_setreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(shadow), val); + error = EINVAL; + } else { + uint64_t gi; + + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); + gi &= ~HWINTR_BLOCKING; + vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); + error = 0; } + } else { + uint32_t encoding; - if (reg == VM_REG_GUEST_CR3) { + error = 0; + encoding = vmcs_field_encoding(reg); + switch (encoding) { + case VMCS_GUEST_IA32_EFER: + /* + * If the "load EFER" VM-entry control is 1 then the + * value of EFER.LMA must be identical to "IA-32e mode + * guest" bit in the VM-entry control. + */ + if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) { + uint64_t ctls; + + ctls = vmcs_read(VMCS_ENTRY_CTLS); + if (val & EFER_LMA) { + ctls |= VM_ENTRY_GUEST_LMA; + } else { + ctls &= ~VM_ENTRY_GUEST_LMA; + } + vmcs_write(VMCS_ENTRY_CTLS, ctls); + } + vmcs_write(encoding, val); + break; + case VMCS_GUEST_CR0: + /* + * The guest is not allowed to modify certain bits in + * %cr0 and %cr4. To maintain the illusion of full + * control, they have shadow versions which contain the + * guest-perceived (via reads from the register) values + * as opposed to the guest-effective values. + * + * This is detailed in the SDM: Vol. 3 Ch. 24.6.6. + */ + vmcs_write(VMCS_CR0_SHADOW, val); + vmcs_write(encoding, vmx_fix_cr0(val)); + break; + case VMCS_GUEST_CR4: + /* See above for detail on %cr4 shadowing */ + vmcs_write(VMCS_CR4_SHADOW, val); + vmcs_write(encoding, vmx_fix_cr4(val)); + break; + case VMCS_GUEST_CR3: + vmcs_write(encoding, val); /* * Invalidate the guest vcpu's TLB mappings to emulate * the behavior of updating %cr3. @@ -3357,38 +3317,80 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * XXX the processor retains global mappings when %cr3 * is updated but vmx_invvpid() does not. */ - pmap = vmx->ctx[vcpu].pmap; - vmx_invvpid(vmx, vcpu, pmap, running); + vmx_invvpid(vmx, vcpu, vmx->ctx[vcpu].pmap, running); + break; + case VMCS_INVALID_ENCODING: + error = EINVAL; + break; + default: + vmcs_write(encoding, val); + break; } } + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (error); } static int -vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) { int hostcpu, running; struct vmx *vmx = arg; + uint32_t base, limit, access; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu); - return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc)); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } + + vmcs_seg_desc_encoding(seg, &base, &limit, &access); + desc->base = vmcs_read(base); + desc->limit = vmcs_read(limit); + if (access != VMCS_INVALID_ENCODING) { + desc->access = vmcs_read(access); + } else { + desc->access = 0; + } + + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (0); } static int -vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmx_setdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) { int hostcpu, running; struct vmx *vmx = arg; + uint32_t base, limit, access; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu); - return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc)); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } + + vmcs_seg_desc_encoding(seg, &base, &limit, &access); + vmcs_write(base, desc->base); + vmcs_write(limit, desc->limit); + if (access != VMCS_INVALID_ENCODING) { + vmcs_write(access, desc->access); + } + + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (0); } static int @@ -3436,21 +3438,17 @@ static int vmx_setcap(void *arg, int vcpu, int type, int val) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; - uint32_t baseval; + uint32_t baseval, reg, flag; uint32_t *pptr; int error; - int flag; - int reg; - int retval; - retval = ENOENT; + error = ENOENT; pptr = NULL; switch (type) { case VM_CAP_HALT_EXIT: if (cap_halt_exit) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_HLT_EXITING; @@ -3459,7 +3457,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_MTRAP_EXIT: if (cap_monitor_trap) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_MTF; @@ -3468,7 +3466,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_PAUSE_EXIT: if (cap_pause_exit) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_PAUSE_EXITING; @@ -3477,7 +3475,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_ENABLE_INVPCID: if (cap_invpcid) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls2; baseval = *pptr; flag = PROCBASED2_ENABLE_INVPCID; @@ -3485,7 +3483,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } break; case VM_CAP_BPT_EXIT: - retval = 0; + error = 0; /* Don't change the bitmap if we are tracing all exceptions. */ if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) { @@ -3499,8 +3497,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; } - if (retval) - return (retval); + if (error != 0) { + return (error); + } if (pptr != NULL) { if (val) { @@ -3508,12 +3507,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } else { baseval &= ~flag; } - VMPTRLD(vmcs); - error = vmwrite(reg, baseval); - VMCLEAR(vmcs); - - if (error) - return (error); + vmcs_load(vmx->vmcs_pa[vcpu]); + vmcs_write(reg, baseval); + vmcs_clear(vmx->vmcs_pa[vcpu]); /* * Update optional stored flags, and record @@ -3715,13 +3711,11 @@ static void vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) { struct vmx *vmx; - struct vmcs *vmcs; uint32_t proc_ctls; int vcpuid; vcpuid = vlapic->vcpuid; vmx = ((struct vlapic_vtx *)vlapic)->vmx; - vmcs = &vmx->vmcs[vcpuid]; proc_ctls = vmx->cap[vcpuid].proc_ctls; proc_ctls &= ~PROCBASED_USE_TPR_SHADOW; @@ -3729,34 +3723,32 @@ vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) proc_ctls |= PROCBASED_CR8_STORE_EXITING; vmx->cap[vcpuid].proc_ctls = proc_ctls; - VMPTRLD(vmcs); + vmcs_load(vmx->vmcs_pa[vcpuid]); vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); - VMCLEAR(vmcs); + vmcs_clear(vmx->vmcs_pa[vcpuid]); } static void vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) { struct vmx *vmx; - struct vmcs *vmcs; uint32_t proc_ctls2; int vcpuid, error; vcpuid = vlapic->vcpuid; vmx = ((struct vlapic_vtx *)vlapic)->vmx; - vmcs = &vmx->vmcs[vcpuid]; proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0, - ("%s: invalid proc_ctls2 %#x", __func__, proc_ctls2)); + ("%s: invalid proc_ctls2 %x", __func__, proc_ctls2)); proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES; proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE; vmx->cap[vcpuid].proc_ctls2 = proc_ctls2; - VMPTRLD(vmcs); + vmcs_load(vmx->vmcs_pa[vcpuid]); vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2); - VMCLEAR(vmcs); + vmcs_clear(vmx->vmcs_pa[vcpuid]); if (vlapic->vcpuid == 0) { /* @@ -3932,10 +3924,9 @@ static void vmx_savectx(void *arg, int vcpu) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { - VERIFY3U(vmclear(vmcs), ==, 0); + vmcs_clear(vmx->vmcs_pa[vcpu]); vmx_msr_guest_exit(vmx, vcpu); /* * Having VMCLEARed the VMCS, it can no longer be re-entered @@ -3951,13 +3942,12 @@ static void vmx_restorectx(void *arg, int vcpu) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; ASSERT0(vmx->vmcs_state[vcpu] & VS_LAUNCHED); if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { vmx_msr_guest_enter(vmx, vcpu); - VERIFY3U(vmptrld(vmcs), ==, 0); + vmcs_load(vmx->vmcs_pa[vcpu]); } } #endif /* __FreeBSD__ */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h index 0fd723f9c9..7943c1fd0e 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h @@ -50,44 +50,34 @@ struct pmap; struct vmxctx { - register_t guest_rdi; /* Guest state */ - register_t guest_rsi; - register_t guest_rdx; - register_t guest_rcx; - register_t guest_r8; - register_t guest_r9; - register_t guest_rax; - register_t guest_rbx; - register_t guest_rbp; - register_t guest_r10; - register_t guest_r11; - register_t guest_r12; - register_t guest_r13; - register_t guest_r14; - register_t guest_r15; - register_t guest_cr2; - register_t guest_dr0; - register_t guest_dr1; - register_t guest_dr2; - register_t guest_dr3; - register_t guest_dr6; - -#ifdef __FreeBSD__ - register_t host_r15; /* Host state */ - register_t host_r14; - register_t host_r13; - register_t host_r12; - register_t host_rbp; - register_t host_rsp; - register_t host_rbx; -#endif /* __FreeBSD__ */ - - register_t host_dr0; - register_t host_dr1; - register_t host_dr2; - register_t host_dr3; - register_t host_dr6; - register_t host_dr7; + uint64_t guest_rdi; /* Guest state */ + uint64_t guest_rsi; + uint64_t guest_rdx; + uint64_t guest_rcx; + uint64_t guest_r8; + uint64_t guest_r9; + uint64_t guest_rax; + uint64_t guest_rbx; + uint64_t guest_rbp; + uint64_t guest_r10; + uint64_t guest_r11; + uint64_t guest_r12; + uint64_t guest_r13; + uint64_t guest_r14; + uint64_t guest_r15; + uint64_t guest_cr2; + uint64_t guest_dr0; + uint64_t guest_dr1; + uint64_t guest_dr2; + uint64_t guest_dr3; + uint64_t guest_dr6; + + uint64_t host_dr0; + uint64_t host_dr1; + uint64_t host_dr2; + uint64_t host_dr3; + uint64_t host_dr6; + uint64_t host_dr7; uint64_t host_debugctl; int host_tf; @@ -156,6 +146,7 @@ struct vmx { uint64_t host_msrs[VM_MAXCPU][GUEST_MSR_NUM]; uint64_t tsc_offset_active[VM_MAXCPU]; vmcs_state_t vmcs_state[VM_MAXCPU]; + uintptr_t vmcs_pa[VM_MAXCPU]; #endif struct vmxctx ctx[VM_MAXCPU]; struct vmxcap cap[VM_MAXCPU]; @@ -175,17 +166,38 @@ vmx_cap_en(const struct vmx *vmx, enum vmx_caps cap) return ((vmx->vmx_caps & cap) == cap); } + +/* + * Section 5.2 "Conventions" from Intel Architecture Manual 2B. + * + * error + * VMsucceed 0 + * VMFailInvalid 1 + * VMFailValid 2 see also VMCS VM-Instruction Error Field + */ +#define VM_SUCCESS 0 +#define VM_FAIL_INVALID 1 +#define VM_FAIL_VALID 2 +#define VMX_SET_ERROR_CODE_ASM \ + " jnc 1f;" \ + " mov $1, %[error];" /* CF: error = 1 */ \ + " jmp 3f;" \ + "1: jnz 2f;" \ + " mov $2, %[error];" /* ZF: error = 2 */ \ + " jmp 3f;" \ + "2: mov $0, %[error];" \ + "3:" + + #define VMX_GUEST_VMEXIT 0 #define VMX_VMRESUME_ERROR 1 #define VMX_VMLAUNCH_ERROR 2 #define VMX_INVEPT_ERROR 3 #define VMX_VMWRITE_ERROR 4 + int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched); void vmx_call_isr(uintptr_t entry); -u_long vmx_fix_cr0(u_long cr0); -u_long vmx_fix_cr4(u_long cr4); - int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset); extern char vmx_exit_guest[]; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h deleted file mode 100644 index f0c5ba7691..0000000000 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h +++ /dev/null @@ -1,244 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - * - * Copyright 2014 Pluribus Networks Inc. - * Copyright 2017 Joyent, Inc. - */ - -#ifndef _VMX_CPUFUNC_H_ -#define _VMX_CPUFUNC_H_ - -struct vmcs; - -/* - * Section 5.2 "Conventions" from Intel Architecture Manual 2B. - * - * error - * VMsucceed 0 - * VMFailInvalid 1 - * VMFailValid 2 see also VMCS VM-Instruction Error Field - */ -#define VM_SUCCESS 0 -#define VM_FAIL_INVALID 1 -#define VM_FAIL_VALID 2 -#define VMX_SET_ERROR_CODE \ - " jnc 1f;" \ - " mov $1, %[error];" /* CF: error = 1 */ \ - " jmp 3f;" \ - "1: jnz 2f;" \ - " mov $2, %[error];" /* ZF: error = 2 */ \ - " jmp 3f;" \ - "2: mov $0, %[error];" \ - "3:" - -/* returns 0 on success and non-zero on failure */ -static __inline int -vmxon(char *region) -{ - int error; - uint64_t addr; - -#ifdef __FreeBSD__ - addr = vtophys(region); -#else - /* This is pre-translated in illumos */ - addr = (uint64_t)region; -#endif - __asm __volatile("vmxon %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - - return (error); -} - -#ifdef __FreeBSD__ -/* returns 0 on success and non-zero on failure */ -static __inline int -vmclear(struct vmcs *vmcs) -{ - int error; - uint64_t addr; - - addr = vtophys(vmcs); - __asm __volatile("vmclear %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} -#endif /* __FreeBSD__ */ - -static __inline void -vmxoff(void) -{ - - __asm __volatile("vmxoff"); -} - -static __inline void -vmptrst(uint64_t *addr) -{ - - __asm __volatile("vmptrst %[addr]" :: [addr]"m" (*addr) : "memory"); -} - -#ifdef __FreeBSD__ -static __inline int -vmptrld(struct vmcs *vmcs) -{ - int error; - uint64_t addr; - - addr = vtophys(vmcs); - __asm __volatile("vmptrld %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} -#endif /* __FreeBSD__ */ - -static __inline int -vmwrite(uint64_t reg, uint64_t val) -{ - int error; - - __asm __volatile("vmwrite %[val], %[reg];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [val] "r" (val), [reg] "r" (reg) - : "memory"); - - return (error); -} - -static __inline int -vmread(uint64_t r, uint64_t *addr) -{ - int error; - - __asm __volatile("vmread %[r], %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [r] "r" (r), [addr] "m" (*addr) - : "memory"); - - return (error); -} - -#ifdef __FreeBSD__ -static __inline void -VMCLEAR(struct vmcs *vmcs) -{ - int err; - - err = vmclear(vmcs); - if (err != 0) - panic("%s: vmclear(%p) error %d", __func__, vmcs, err); - - critical_exit(); -} - -static __inline void -VMPTRLD(struct vmcs *vmcs) -{ - int err; - - critical_enter(); - - err = vmptrld(vmcs); - if (err != 0) - panic("%s: vmptrld(%p) error %d", __func__, vmcs, err); -} -#endif /* __FreeBSD__ */ - -#define INVVPID_TYPE_ADDRESS 0UL -#define INVVPID_TYPE_SINGLE_CONTEXT 1UL -#define INVVPID_TYPE_ALL_CONTEXTS 2UL - -struct invvpid_desc { - uint16_t vpid; - uint16_t _res1; - uint32_t _res2; - uint64_t linear_addr; -}; -CTASSERT(sizeof(struct invvpid_desc) == 16); - -static __inline void -invvpid(uint64_t type, struct invvpid_desc desc) -{ - int error; - - __asm __volatile("invvpid %[desc], %[type];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [desc] "m" (desc), [type] "r" (type) - : "memory"); - - if (error) - panic("invvpid error %d", error); -} - -#define INVEPT_TYPE_SINGLE_CONTEXT 1UL -#define INVEPT_TYPE_ALL_CONTEXTS 2UL -struct invept_desc { - uint64_t eptp; - uint64_t _res; -}; -CTASSERT(sizeof(struct invept_desc) == 16); - -static __inline void -invept(uint64_t type, struct invept_desc desc) -{ - int error; - - __asm __volatile("invept %[desc], %[type];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [desc] "m" (desc), [type] "r" (type) - : "memory"); - - if (error) - panic("invept error %d", error); -} -#endif diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c index 6c37c9c234..cfdf2bfe05 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c @@ -62,13 +62,6 @@ vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) return ((msr_val & (1UL << bitpos)) == 0); } -uint32_t -vmx_revision(void) -{ - - return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); -} - /* * Generate a bitmask to be used for the VMCS execution control fields. * diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h index ac2adb0dd1..848cdea26b 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h @@ -40,8 +40,6 @@ void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid); int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu); int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu); -uint32_t vmx_revision(void); - int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, uint32_t zeros_mask, uint32_t *retval); diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c index 50c0934ace..79524220b5 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c @@ -611,10 +611,10 @@ vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, ptpindex = 0; ptpshift = 0; - KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__, + KASSERT(gpa + len > gpa, ("%s: invalid gpa range %lx/%lx", __func__, gpa, len)); - KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond " - "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr)); + KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %lx/%lx beyond " + "domain maxaddr %lx", __func__, gpa, len, dom->maxaddr)); if (gpa & PAGE_MASK) panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); diff --git a/usr/src/uts/i86pc/io/vmm/io/iommu.c b/usr/src/uts/i86pc/io/vmm/io/iommu.c index 918a9ec3e4..2e5fc9df32 100644 --- a/usr/src/uts/i86pc/io/vmm/io/iommu.c +++ b/usr/src/uts/i86pc/io/vmm/io/iommu.c @@ -204,12 +204,8 @@ iommu_find_device(dev_info_t *dip, void *arg) static void iommu_init(void) { - int error, bus, slot, func; + int error; vm_paddr_t maxaddr; -#ifdef __FreeBSD__ - devclass_t dc; -#endif - device_t dev; if (!iommu_enable) return; @@ -246,35 +242,7 @@ iommu_init(void) */ iommu_create_mapping(host_domain, 0, 0, maxaddr); -#ifdef __FreeBSD__ - add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0); - delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete, - NULL, 0); - dc = devclass_find("ppt"); - for (bus = 0; bus <= PCI_BUSMAX; bus++) { - for (slot = 0; slot <= PCI_SLOTMAX; slot++) { - for (func = 0; func <= PCI_FUNCMAX; func++) { - dev = pci_find_dbsf(0, bus, slot, func); - if (dev == NULL) - continue; - - /* Skip passthrough devices. */ - if (dc != NULL && - device_get_devclass(dev) == dc) - continue; - - /* - * Everything else belongs to the host - * domain. - */ - iommu_add_device(host_domain, - pci_get_rid(dev)); - } - } - } -#else ddi_walk_devs(ddi_root_node(), iommu_find_device, (void *)B_TRUE); -#endif IOMMU_ENABLE(); } diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c index c1825f4264..f7a05254ec 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c @@ -96,6 +96,12 @@ __FBSDID("$FreeBSD$"); static void vlapic_set_error(struct vlapic *, uint32_t, bool); static void vlapic_tmr_reset(struct vlapic *); +#ifdef __ISRVEC_DEBUG +static void vlapic_isrstk_accept(struct vlapic *, int); +static void vlapic_isrstk_eoi(struct vlapic *, int); +static void vlapic_isrstk_verify(const struct vlapic *); +#endif /* __ISRVEC_DEBUG */ + static __inline uint32_t vlapic_get_id(struct vlapic *vlapic) { @@ -134,12 +140,14 @@ vlapic_dfr_write_handler(struct vlapic *vlapic) lapic->dfr &= APIC_DFR_MODEL_MASK; lapic->dfr |= APIC_DFR_RESERVED; +#ifdef __FreeBSD__ if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); else VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); +#endif } void @@ -495,21 +503,33 @@ vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) return (1); } -#if 1 -static void -dump_isrvec_stk(struct vlapic *vlapic) +static uint_t +vlapic_active_isr(struct vlapic *vlapic) { int i; - uint32_t *isrptr; + uint32_t *isrp; - isrptr = &vlapic->apic_page->isr0; - for (i = 0; i < 8; i++) - printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); + isrp = &vlapic->apic_page->isr7; - for (i = 0; i <= vlapic->isrvec_stk_top; i++) - printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); + for (i = 7; i >= 0; i--, isrp -= 4) { + uint32_t reg = *isrp; + + if (reg != 0) { + uint_t vec = (i * 32) + bsrl(reg); + + if (vec < 16) { + /* + * Truncate the illegal low vectors to value of + * 0, indicating that no active ISR was found. + */ + return (0); + } + return (vec); + } + } + + return (0); } -#endif /* * Algorithm adopted from section "Interrupt, Task and Processor Priority" @@ -520,55 +540,11 @@ vlapic_update_ppr(struct vlapic *vlapic) { int isrvec, tpr, ppr; - /* - * Note that the value on the stack at index 0 is always 0. - * - * This is a placeholder for the value of ISRV when none of the - * bits is set in the ISRx registers. - */ - isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; + isrvec = vlapic_active_isr(vlapic); tpr = vlapic->apic_page->tpr; -#if 1 - { - int i, lastprio, curprio, vector, idx; - uint32_t *isrptr; - - if (vlapic->isrvec_stk_top == 0 && isrvec != 0) - panic("isrvec_stk is corrupted: %d", isrvec); - - /* - * Make sure that the priority of the nested interrupts is - * always increasing. - */ - lastprio = -1; - for (i = 1; i <= vlapic->isrvec_stk_top; i++) { - curprio = PRIO(vlapic->isrvec_stk[i]); - if (curprio <= lastprio) { - dump_isrvec_stk(vlapic); - panic("isrvec_stk does not satisfy invariant"); - } - lastprio = curprio; - } - - /* - * Make sure that each bit set in the ISRx registers has a - * corresponding entry on the isrvec stack. - */ - i = 1; - isrptr = &vlapic->apic_page->isr0; - for (vector = 0; vector < 256; vector++) { - idx = (vector / 32) * 4; - if (isrptr[idx] & (1 << (vector % 32))) { - if (i > vlapic->isrvec_stk_top || - vlapic->isrvec_stk[i] != vector) { - dump_isrvec_stk(vlapic); - panic("ISR and isrvec_stk out of sync"); - } - i++; - } - } - } +#ifdef __ISRVEC_DEBUG + vlapic_isrstk_verify(vlapic); #endif if (PRIO(tpr) >= PRIO(isrvec)) @@ -593,25 +569,25 @@ vlapic_process_eoi(struct vlapic *vlapic) { struct LAPIC *lapic = vlapic->apic_page; uint32_t *isrptr, *tmrptr; - int i, idx, bitpos, vector; + int i; + uint_t idx, bitpos, vector; isrptr = &lapic->isr0; tmrptr = &lapic->tmr0; for (i = 7; i >= 0; i--) { idx = i * 4; - bitpos = fls(isrptr[idx]); - if (bitpos-- != 0) { - if (vlapic->isrvec_stk_top <= 0) { - panic("invalid vlapic isrvec_stk_top %d", - vlapic->isrvec_stk_top); - } - isrptr[idx] &= ~(1 << bitpos); + if (isrptr[idx] != 0) { + bitpos = bsrl(isrptr[idx]); vector = i * 32 + bitpos; + + isrptr[idx] &= ~(1 << bitpos); VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", vector); VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); - vlapic->isrvec_stk_top--; +#ifdef __ISRVEC_DEBUG + vlapic_isrstk_eoi(vlapic, vector); +#endif vlapic_update_ppr(vlapic); if ((tmrptr[idx] & (1 << bitpos)) != 0) { vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, @@ -1143,7 +1119,7 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) { struct LAPIC *lapic = vlapic->apic_page; uint32_t *irrptr, *isrptr; - int idx, stk_top; + int idx; if (vlapic->ops.intr_accepted) return ((*vlapic->ops.intr_accepted)(vlapic, vector)); @@ -1162,16 +1138,9 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) isrptr[idx] |= 1 << (vector % 32); VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); - /* - * Update the PPR - */ - vlapic->isrvec_stk_top++; - - stk_top = vlapic->isrvec_stk_top; - if (stk_top >= ISRVEC_STK_SIZE) - panic("isrvec_stk_top overflow %d", stk_top); - - vlapic->isrvec_stk[stk_top] = vector; +#ifdef __ISRVEC_DEBUG + vlapic_isrstk_accept(vlapic, vector); +#endif } void @@ -1708,3 +1677,92 @@ vlapic_localize_resources(struct vlapic *vlapic) vmm_glue_callout_localize(&vlapic->callout); } #endif /* __FreeBSD */ + +#ifdef __ISRVEC_DEBUG +static void +vlapic_isrstk_eoi(struct vlapic *vlapic, int vector) +{ + if (vlapic->isrvec_stk_top <= 0) { + panic("invalid vlapic isrvec_stk_top %d", + vlapic->isrvec_stk_top); + } + vlapic->isrvec_stk_top--; +} + +static void +vlapic_isrstk_accept(struct vlapic *vlapic, int vector) +{ + int stk_top; + + vlapic->isrvec_stk_top++; + + stk_top = vlapic->isrvec_stk_top; + if (stk_top >= ISRVEC_STK_SIZE) + panic("isrvec_stk_top overflow %d", stk_top); + + vlapic->isrvec_stk[stk_top] = vector; +} + +static void +vlapic_isrstk_dump(const struct vlapic *vlapic) +{ + int i; + uint32_t *isrptr; + + isrptr = &vlapic->apic_page->isr0; + for (i = 0; i < 8; i++) + printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); + + for (i = 0; i <= vlapic->isrvec_stk_top; i++) + printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); +} + +static void +vlapic_isrstk_verify(const struct vlapic *vlapic) +{ + int i, lastprio, curprio, vector, idx; + uint32_t *isrptr; + + /* + * Note: The value at index 0 in isrvec_stk is always 0. + * + * It is a placeholder for the value of ISR vector when no bits are set + * in the ISRx registers. + */ + if (vlapic->isrvec_stk_top == 0 && vlapic->isrvec_stk[0] != 0) { + panic("isrvec_stk is corrupted: %d", vlapic->isrvec_stk[0]); + } + + /* + * Make sure that the priority of the nested interrupts is + * always increasing. + */ + lastprio = -1; + for (i = 1; i <= vlapic->isrvec_stk_top; i++) { + curprio = PRIO(vlapic->isrvec_stk[i]); + if (curprio <= lastprio) { + vlapic_isrstk_dump(vlapic); + panic("isrvec_stk does not satisfy invariant"); + } + lastprio = curprio; + } + + /* + * Make sure that each bit set in the ISRx registers has a + * corresponding entry on the isrvec stack. + */ + i = 1; + isrptr = &vlapic->apic_page->isr0; + for (vector = 0; vector < 256; vector++) { + idx = (vector / 32) * 4; + if (isrptr[idx] & (1 << (vector % 32))) { + if (i > vlapic->isrvec_stk_top || + vlapic->isrvec_stk[i] != vector) { + vlapic_isrstk_dump(vlapic); + panic("ISR and isrvec_stk out of sync"); + } + i++; + } + } +} +#endif diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h index 5795d48d52..8a0d594de3 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h @@ -27,6 +27,18 @@ * * $FreeBSD$ */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ #ifndef _VLAPIC_PRIV_H_ #define _VLAPIC_PRIV_H_ @@ -140,6 +152,10 @@ enum boot_state { #define VLAPIC_TMR_CNT 8 +#ifdef DEBUG +#define __ISRVEC_DEBUG +#endif + struct vlapic; struct vlapic_ops { @@ -166,15 +182,6 @@ struct vlapic { struct bintime timer_period_bt; /* timer period */ struct mtx timer_mtx; - /* - * The 'isrvec_stk' is a stack of vectors injected by the local apic. - * A vector is popped from the stack when the processor does an EOI. - * The vector on the top of the stack is used to compute the - * Processor Priority in conjunction with the TPR. - */ - uint8_t isrvec_stk[ISRVEC_STK_SIZE]; - int isrvec_stk_top; - uint64_t msr_apicbase; enum boot_state boot_state; @@ -199,6 +206,19 @@ struct vlapic { */ uint32_t tmr_vec_deassert[VLAPIC_TMR_CNT]; uint32_t tmr_vec_assert[VLAPIC_TMR_CNT]; + +#ifdef __ISRVEC_DEBUG + /* + * The 'isrvec_stk' is a stack of vectors injected by the local APIC. + * It is used as a debugging method to double-check the behavior of the + * emulation. Vectors are pushed to the stack when they are accepted + * for injection and popped from the stack when the processor performs + * an EOI. The vector on the top of the stack is used to verify the + * computed Processor Priority. + */ + uint8_t isrvec_stk[ISRVEC_STK_SIZE]; + int isrvec_stk_top; +#endif }; void vlapic_init(struct vlapic *vlapic); diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c index 0dce2b0a1f..6664cb06e7 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c +++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c @@ -98,7 +98,7 @@ vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes, now = sbinuptime(); delta = now - vpmtmr->baseuptime; KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: " - "%#lx to %#lx", vpmtmr->baseuptime, now)); + "%lx to %lx", vpmtmr->baseuptime, now)); *val = vpmtmr->baseval + delta / vpmtmr->freq_sbt; return (0); diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c index 343ad9c37a..e560ce9b7f 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c +++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c @@ -161,7 +161,7 @@ vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime) now = sbinuptime(); delta = now - vrtc->base_uptime; KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: " - "%#lx to %#lx", vrtc->base_uptime, now)); + "%lx to %lx", vrtc->base_uptime, now)); secs = delta / SBT_1S; t += secs; *basetime += secs * SBT_1S; @@ -191,7 +191,7 @@ secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update) if (rtctime < 0) { KASSERT(rtctime == VRTC_BROKEN_TIME, - ("%s: invalid vrtc time %#lx", __func__, rtctime)); + ("%s: invalid vrtc time %lx", __func__, rtctime)); return; } @@ -286,33 +286,23 @@ rtc_to_secs(struct vrtc *vrtc) struct clocktime ct; struct timespec ts; struct rtcdev *rtc; -#ifdef __FreeBSD__ - struct vm *vm; -#endif int century, error, hour, pm, year; KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); -#ifdef __FreeBSD__ - vm = vrtc->vm; -#endif rtc = &vrtc->rtcdev; bzero(&ct, sizeof(struct clocktime)); error = rtcget(rtc, rtc->sec, &ct.sec); if (error || ct.sec < 0 || ct.sec > 59) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec); -#endif + /* invalid RTC seconds */ goto fail; } error = rtcget(rtc, rtc->min, &ct.min); if (error || ct.min < 0 || ct.min > 59) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min); -#endif + /* invalid RTC minutes */ goto fail; } @@ -342,18 +332,13 @@ rtc_to_secs(struct vrtc *vrtc) if (pm) ct.hour += 12; } else { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC 12-hour format %#x/%d", - rtc->hour, ct.hour); -#endif + /* invalid RTC 12-hour format */ goto fail; } } if (error || ct.hour < 0 || ct.hour > 23) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour); -#endif + /* invalid RTC hour */ goto fail; } @@ -367,47 +352,32 @@ rtc_to_secs(struct vrtc *vrtc) error = rtcget(rtc, rtc->day_of_month, &ct.day); if (error || ct.day < 1 || ct.day > 31) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month, - ct.day); -#endif + /* invalid RTC mday */ goto fail; } error = rtcget(rtc, rtc->month, &ct.mon); if (error || ct.mon < 1 || ct.mon > 12) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon); -#endif + /* invalid RTC month */ goto fail; } error = rtcget(rtc, rtc->year, &year); if (error || year < 0 || year > 99) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year); -#endif + /* invalid RTC year */ goto fail; } error = rtcget(rtc, rtc->century, ¢ury); ct.year = century * 100 + year; if (error || ct.year < POSIX_BASE_YEAR) { -#ifdef __FreeBSD__ - VM_CTR2(vm, "Invalid RTC century %#x/%d", rtc->century, - ct.year); -#endif + /* invalid RTC century */ goto fail; } error = clock_ct_to_ts(&ct, &ts); if (error || ts.tv_sec < 0) { -#ifdef __FreeBSD__ - VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d", - ct.year, ct.mon, ct.day); - VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d", - ct.hour, ct.min, ct.sec); -#endif + /* invalid RTC clocktime */ goto fail; } return (ts.tv_sec); /* success */ @@ -416,9 +386,6 @@ fail: * Stop updating the RTC if the date/time fields programmed by * the guest are invalid. */ -#ifdef __FreeBSD__ - VM_CTR0(vrtc->vm, "Invalid RTC date/time programming detected"); -#endif return (VRTC_BROKEN_TIME); } @@ -426,9 +393,6 @@ static int vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase) { struct rtcdev *rtc; -#ifdef __FreeBSD__ - sbintime_t oldbase; -#endif time_t oldtime; uint8_t alarm_sec, alarm_min, alarm_hour; @@ -440,14 +404,9 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase) alarm_hour = rtc->alarm_hour; oldtime = vrtc->base_rtctime; - VM_CTR2(vrtc->vm, "Updating RTC secs from %#lx to %#lx", + VM_CTR2(vrtc->vm, "Updating RTC secs from %lx to %lx", oldtime, newtime); -#ifdef __FreeBSD__ - oldbase = vrtc->base_uptime; - VM_CTR2(vrtc->vm, "Updating RTC base uptime from %#lx to %#lx", - oldbase, newbase); -#endif vrtc->base_uptime = newbase; if (newtime == oldtime) @@ -614,7 +573,7 @@ vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq) active = callout_active(&vrtc->callout) ? 1 : 0; KASSERT((freq == 0 && !active) || (freq != 0 && active), - ("vrtc callout %s with frequency %#lx", + ("vrtc callout %s with frequency %lx", active ? "active" : "inactive", freq)); } @@ -643,7 +602,7 @@ vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval) rtc->reg_c = newirqf | newval; changed = oldval ^ rtc->reg_c; if (changed) { - VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x", + VM_CTR2(vrtc->vm, "RTC reg_c changed from %x to %x", oldval, rtc->reg_c); } @@ -674,7 +633,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) rtc->reg_b = newval; changed = oldval ^ newval; if (changed) { - VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x", + VM_CTR2(vrtc->vm, "RTC reg_b changed from %x to %x", oldval, newval); } @@ -689,7 +648,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) } else { curtime = vrtc_curtime(vrtc, &basetime); KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch " - "between vrtc basetime (%#lx) and curtime (%#lx)", + "between vrtc basetime (%lx) and curtime (%lx)", __func__, vrtc->base_rtctime, curtime)); /* @@ -745,7 +704,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) oldfreq = vrtc_freq(vrtc); if (divider_enabled(oldval) && !divider_enabled(newval)) { - VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx", + VM_CTR2(vrtc->vm, "RTC divider held in reset at %lx/%lx", vrtc->base_rtctime, vrtc->base_uptime); } else if (!divider_enabled(oldval) && divider_enabled(newval)) { /* @@ -755,7 +714,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) * while the dividers were disabled. */ vrtc->base_uptime = sbinuptime(); - VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx", + VM_CTR2(vrtc->vm, "RTC divider out of reset at %lx/%lx", vrtc->base_rtctime, vrtc->base_uptime); } else { /* NOTHING */ @@ -764,7 +723,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) vrtc->rtcdev.reg_a = newval; changed = oldval ^ newval; if (changed) { - VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x", + VM_CTR2(vrtc->vm, "RTC reg_a changed from %x to %x", oldval, newval); } @@ -790,10 +749,10 @@ vrtc_set_time(struct vm *vm, time_t secs) VRTC_UNLOCK(vrtc); if (error) { - VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error, + VM_CTR2(vrtc->vm, "Error %d setting RTC time to %lx", error, secs); } else { - VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs); + VM_CTR1(vrtc->vm, "RTC time set to %lx", secs); } return (error); @@ -835,7 +794,7 @@ vrtc_nvram_write(struct vm *vm, int offset, uint8_t value) VRTC_LOCK(vrtc); ptr = (uint8_t *)(&vrtc->rtcdev); ptr[offset] = value; - VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset); + VM_CTR2(vrtc->vm, "RTC nvram write %x to offset %x", value, offset); VRTC_UNLOCK(vrtc); return (0); @@ -945,24 +904,24 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, } else { *val = *((uint8_t *)rtc + offset); } - VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x", + VCPU_CTR2(vm, vcpuid, "Read value %x from RTC offset %x", *val, offset); } else { switch (offset) { case 10: - VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val); + VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %x", *val); vrtc_set_reg_a(vrtc, *val); break; case 11: - VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val); + VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %x", *val); error = vrtc_set_reg_b(vrtc, *val); break; case 12: - VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)", + VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %x (ignored)", *val); break; case 13: - VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)", + VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %x (ignored)", *val); break; case 0: @@ -972,7 +931,7 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, *val &= 0x7f; /* FALLTHRU */ default: - VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x", + VCPU_CTR2(vm, vcpuid, "RTC offset %x set to %x", offset, *val); *((uint8_t *)rtc + offset) = *val; break; diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h index fbd2884b84..9501850dfc 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h @@ -74,7 +74,7 @@ typedef int (*vmm_init_func_t)(int ipinum); typedef int (*vmm_cleanup_func_t)(void); typedef void (*vmm_resume_func_t)(void); typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); -typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, +typedef int (*vmi_run_func_t)(void *vmi, int vcpu, uint64_t rip, struct pmap *pmap, struct vm_eventinfo *info); typedef void (*vmi_cleanup_func_t)(void *vmi); typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 7a47cd0cd1..4eb967fd89 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -1014,7 +1014,7 @@ vm_iommu_modify(struct vm *vm, bool map) if (map) { KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, - ("iommu map found invalid memmap %#lx/%#lx/%#x", + ("iommu map found invalid memmap %lx/%lx/%x", mm->gpa, mm->len, mm->flags)); if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) continue; @@ -1024,7 +1024,7 @@ vm_iommu_modify(struct vm *vm, bool map) continue; mm->flags &= ~VM_MEMMAP_F_IOMMU; KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, - ("iommu unmap found invalid memmap %#lx/%#lx/%#x", + ("iommu unmap found invalid memmap %lx/%lx/%x", mm->gpa, mm->len, mm->flags)); } @@ -1032,7 +1032,7 @@ vm_iommu_modify(struct vm *vm, bool map) while (gpa < mm->gpa + mm->len) { vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE, &cookie); - KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", + KASSERT(vp != NULL, ("vm(%s) could not map gpa %lx", vm_name(vm), gpa)); vm_gpa_release(cookie); @@ -1213,7 +1213,7 @@ vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) return (error); /* Set 'nextrip' to match the value of %rip */ - VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); + VCPU_CTR1(vm, vcpuid, "Setting nextrip to %lx", val); vcpu = &vm->vcpu[vcpuid]; vcpu->nextrip = val; return (0); @@ -1561,7 +1561,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), vme->u.paging.gpa, ftype); if (rv == 0) { - VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", + VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %lx", ftype == VM_PROT_READ ? "accessed" : "dirty", vme->u.paging.gpa); goto done; @@ -1571,7 +1571,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) map = &vm->vmspace->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); - VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " + VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %lx, " "ftype = %d", rv, vme->u.paging.gpa, ftype); if (rv != KERN_SUCCESS) @@ -1635,7 +1635,7 @@ vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu) inst_addr = vme->rip + vme->u.mmio_emul.cs_base; cs_d = vme->u.mmio_emul.cs_d; - VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", + VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %lx", vme->u.mmio_emul.gpa); /* Fetch the faulting instruction */ @@ -1655,7 +1655,7 @@ vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu) } if (vie_decode_instruction(vie, vm, vcpuid, cs_d) != 0) { - VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", + VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %lx", inst_addr); /* Dump (unrecognized) instruction bytes in userspace */ vie_fallback_exitinfo(vie, vme); @@ -1915,7 +1915,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how) if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) return (EINVAL); - if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { + if (atomic_cmpset_int((uint_t *)&vm->suspend, 0, how) == 0) { VM_CTR2(vm, "virtual machine already suspended %d/%d", vm->suspend, how); return (EALREADY); @@ -2406,7 +2406,7 @@ vm_restart_instruction(void *arg, int vcpuid) * instruction to be restarted. */ vcpu->exitinfo.inst_length = 0; - VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " + VCPU_CTR1(vm, vcpuid, "restarting instruction at %lx by " "setting inst_length to zero", vcpu->exitinfo.rip); } else if (state == VCPU_FROZEN) { /* @@ -2418,7 +2418,7 @@ vm_restart_instruction(void *arg, int vcpuid) error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); KASSERT(!error, ("%s: error %d getting rip", __func__, error)); VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " - "nextrip from %#lx to %#lx", vcpu->nextrip, rip); + "nextrip from %lx to %lx", vcpu->nextrip, rip); vcpu->nextrip = rip; } else { panic("%s: invalid state %d", __func__, state); @@ -2449,7 +2449,7 @@ vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) } else { info = 0; } - VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); + VCPU_CTR2(vm, vcpuid, "%s: info1(%lx)", __func__, info); vcpu->exitintinfo = info; return (0); } @@ -2467,11 +2467,7 @@ exception_class(uint64_t info) { int type, vector; -#ifdef __FreeBSD__ - KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); -#else KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %lx", info)); -#endif type = info & VM_INTINFO_TYPE; vector = info & 0xff; @@ -2519,13 +2515,8 @@ nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, enum exc_class exc1, exc2; int type1, vector1; -#ifdef __FreeBSD__ - KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); - KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); -#else KASSERT(info1 & VM_INTINFO_VALID, ("info1 %lx is not valid", info1)); KASSERT(info2 & VM_INTINFO_VALID, ("info2 %lx is not valid", info2)); -#endif /* * If an exception occurs while attempting to call the double-fault @@ -2534,7 +2525,7 @@ nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, type1 = info1 & VM_INTINFO_TYPE; vector1 = info1 & 0xff; if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { - VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", + VCPU_CTR2(vm, vcpuid, "triple fault: info1(%lx), info2(%lx)", info1, info2); vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); *retinfo = 0; @@ -2594,7 +2585,7 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) if (vcpu->exception_pending) { info2 = vcpu_exception_intinfo(vcpu); vcpu->exception_pending = 0; - VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", + VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %lx", vcpu->exc_vector, info2); } @@ -2611,8 +2602,8 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) } if (valid) { - VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " - "retinfo(%#lx)", __func__, info1, info2, *retinfo); + VCPU_CTR4(vm, vcpuid, "%s: info1(%lx), info2(%lx), " + "retinfo(%lx)", __func__, info1, info2, *retinfo); } return (valid); @@ -2735,7 +2726,7 @@ vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) int error; vm = vmarg; - VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", + VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %x, cr2 %lx", error_code, cr2); error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c index 4dcaba8a82..696052d7d6 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c @@ -181,7 +181,7 @@ vmm_alloc_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused) { struct kmem_item *i = (struct kmem_item *)val; - cmn_err(CE_PANIC, "!vmm_alloc_check: hash not empty: %p, %d", i->addr, + cmn_err(CE_PANIC, "!vmm_alloc_check: hash not empty: %p, %lu", i->addr, i->size); return (MH_WALK_TERMINATE); diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index fc0cf6622f..ae450f1d9b 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -1431,7 +1431,7 @@ static char *x86_feature_names[NUM_X86_FEATURES] = { "tbm", "avx512_vnni", "amd_pcec", - "mb_clear", + "md_clear", "mds_no", "core_thermal", "pkg_thermal", diff --git a/usr/src/uts/i86pc/os/gipt.c b/usr/src/uts/i86pc/os/gipt.c index ace7e03438..7bff5c3897 100644 --- a/usr/src/uts/i86pc/os/gipt.c +++ b/usr/src/uts/i86pc/os/gipt.c @@ -355,7 +355,8 @@ gipt_map_next_page(gipt_map_t *map, uint64_t va, uint64_t max_va, gipt_t **ptp) ASSERT3P(pt, !=, NULL); break; } else { - panic("unexpected PTE type %x @ va %p", ptet, cur_va); + panic("unexpected PTE type %x @ va %p", ptet, + (void *)cur_va); } } @@ -387,7 +388,8 @@ gipt_map_next_page(gipt_map_t *map, uint64_t va, uint64_t max_va, gipt_t **ptp) pt = gipt_map_lookup(map, cur_va, pt->gipt_level - 1); ASSERT3P(pt, !=, NULL); } else { - panic("unexpected PTE type %x @ va %p", ptet, cur_va); + panic("unexpected PTE type %x @ va %p", ptet, + (void *)cur_va); } } diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c index a41ff3e0d1..0e84030ac1 100644 --- a/usr/src/uts/i86pc/os/hma.c +++ b/usr/src/uts/i86pc/os/hma.c @@ -11,6 +11,7 @@ /* * Copyright 2019 Joyent, Inc. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ #include <sys/cpuvar.h> @@ -33,6 +34,7 @@ struct hma_reg { static kmutex_t hma_lock; static list_t hma_registrations; static boolean_t hma_exclusive = B_FALSE; +int hma_disable = 0; static boolean_t hma_vmx_ready = B_FALSE; static const char *hma_vmx_error = NULL; @@ -89,6 +91,11 @@ hma_init(void) list_create(&hma_registrations, sizeof (struct hma_reg), offsetof(struct hma_reg, hr_node)); + if (hma_disable != 0) { + cmn_err(CE_CONT, "?hma_init: disabled"); + return; + } + switch (cpuid_getvendor(CPU)) { case X86_VENDOR_Intel: (void) hma_vmx_init(); diff --git a/usr/src/uts/i86pc/os/mp_implfuncs.c b/usr/src/uts/i86pc/os/mp_implfuncs.c index 2d0bd3eb53..c61e6216b0 100644 --- a/usr/src/uts/i86pc/os/mp_implfuncs.c +++ b/usr/src/uts/i86pc/os/mp_implfuncs.c @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2020 Oxide Computer Company */ #define PSMI_1_7 @@ -32,6 +33,7 @@ #include <sys/psm_modctl.h> #include <sys/smp_impldefs.h> #include <sys/reboot.h> +#include <sys/prom_debug.h> #if defined(__xpv) #include <sys/hypervisor.h> #include <vm/kboot_mmu.h> @@ -390,12 +392,6 @@ psm_modload(void) close_mach_list(); } -#if defined(__xpv) -#define NOTSUP_MSG "This version of Solaris xVM does not support this hardware" -#else -#define NOTSUP_MSG "This version of Solaris does not support this hardware" -#endif /* __xpv */ - void psm_install(void) { @@ -406,14 +402,18 @@ psm_install(void) mutex_enter(&psmsw_lock); for (swp = psmsw->psw_forw; swp != psmsw; ) { + PRM_DEBUGS(swp->psw_infop->p_mach_idstring); opsp = swp->psw_infop->p_ops; if (opsp->psm_probe) { + PRM_POINT("psm_probe()"); if ((*opsp->psm_probe)() == PSM_SUCCESS) { + PRM_POINT("psm_probe() PSM_SUCCESS"); psmcnt++; swp->psw_flag |= PSM_MOD_IDENTIFY; swp = swp->psw_forw; continue; } + PRM_POINT("psm_probe() FAILURE"); } /* remove the unsuccessful psm modules */ cswp = swp; @@ -429,7 +429,8 @@ psm_install(void) } mutex_exit(&psmsw_lock); if (psmcnt == 0) - halt(NOTSUP_MSG); + halt("the operating system does not yet support this hardware"); + PRM_POINT("psminitf()"); (*psminitf)(); } diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c index f36f5f052d..f017995ac8 100644 --- a/usr/src/uts/i86pc/os/mp_machdep.c +++ b/usr/src/uts/i86pc/os/mp_machdep.c @@ -26,6 +26,7 @@ * Copyright (c) 2009-2010, Intel Corporation. * All rights reserved. * Copyright 2018 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #define PSMI_1_7 @@ -63,6 +64,8 @@ #include <sys/sunddi.h> #include <sys/sunndi.h> #include <sys/cpc_pcbe.h> +#include <sys/prom_debug.h> + #define OFFSETOF(s, m) (size_t)(&(((s *)0)->m)) @@ -978,6 +981,7 @@ mach_init() { struct psm_ops *pops; + PRM_POINT("mach_construct_info()"); mach_construct_info(); pops = mach_set[0]; @@ -1017,6 +1021,7 @@ mach_init() notify_error = pops->psm_notify_error; } + PRM_POINT("psm_softinit()"); (*pops->psm_softinit)(); /* @@ -1034,6 +1039,7 @@ mach_init() #ifndef __xpv non_deep_idle_disp_enq_thread = disp_enq_thread; #endif + PRM_DEBUG(idle_cpu_use_hlt); if (idle_cpu_use_hlt) { idle_cpu = cpu_idle_adaptive; CPU->cpu_m.mcpu_idle_cpu = cpu_idle; @@ -1068,6 +1074,7 @@ mach_init() #endif } + PRM_POINT("mach_smpinit()"); mach_smpinit(); } diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index 636e58280a..dd2b5d703b 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -25,6 +25,7 @@ * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2018 Joyent, Inc. * Copyright (c) 2015 by Delphix. All rights reserved. + * Copyright 2020 Oxide Computer Company */ /* * Copyright (c) 2010, Intel Corporation. @@ -74,6 +75,7 @@ #include <sys/memlist_plat.h> #include <sys/varargs.h> #include <sys/promif.h> +#include <sys/prom_debug.h> #include <sys/modctl.h> #include <sys/sunddi.h> @@ -464,7 +466,7 @@ static pgcnt_t kphysm_init(page_t *, pgcnt_t); * | | * 0xFFFFFXXX.XXX00000 |-----------------------|- segkvmm_base (floating) * | segkp | - * |-----------------------|- segkp_base (floating) + * |-----------------------|- segkp_base (floating) * | page_t structures | valloc_base + valloc_sz * | memsegs, memlists, | * | page hash, etc. | @@ -623,21 +625,8 @@ size_t toxic_bit_map_len = 0; /* in bits */ #endif /* __i386 */ -/* - * Simple boot time debug facilities - */ -static char *prm_dbg_str[] = { - "%s:%d: '%s' is 0x%x\n", - "%s:%d: '%s' is 0x%llx\n" -}; - int prom_debug; -#define PRM_DEBUG(q) if (prom_debug) \ - prom_printf(prm_dbg_str[sizeof (q) >> 3], "startup.c", __LINE__, #q, q); -#define PRM_POINT(q) if (prom_debug) \ - prom_printf("%s:%d: %s\n", "startup.c", __LINE__, q); - /* * This structure is used to keep track of the intial allocations * done in startup_memlist(). The value of NUM_ALLOCATIONS needs to @@ -2273,6 +2262,7 @@ startup_end(void) * We can now setup for XSAVE because fpu_probe is done in configure(). */ if (fp_save_mech == FP_XSAVE) { + PRM_POINT("xsave_setup_msr()"); xsave_setup_msr(CPU); } @@ -2281,7 +2271,9 @@ startup_end(void) * support. */ setx86isalist(); + PRM_POINT("cpu_intr_alloc()"); cpu_intr_alloc(CPU, NINTR_THREADS); + PRM_POINT("psm_install()"); psm_install(); /* diff --git a/usr/src/uts/i86pc/sys/hpet_acpi.h b/usr/src/uts/i86pc/sys/hpet_acpi.h index e60ebe4bba..81304674b5 100644 --- a/usr/src/uts/i86pc/sys/hpet_acpi.h +++ b/usr/src/uts/i86pc/sys/hpet_acpi.h @@ -36,7 +36,7 @@ extern "C" { #endif /* - * Solaris uses an HPET Timer to generate interrupts for CPUs in Deep C-state + * illumos uses an HPET Timer to generate interrupts for CPUs in Deep C-state * with stalled LAPIC Timers. All CPUs use one HPET timer. The timer's * interrupt targets one CPU (via the I/O APIC). The one CPU that receives * the HPET's interrupt wakes up other CPUs as needed during the HPET Interrupt @@ -46,7 +46,7 @@ extern "C" { * Please see the Intel Programmer's guides. Interrupts are disabled before * a CPU Halts into Deep C-state. (This allows CPU-hardware-specific cleanup * before servicing interrupts.) When a Deep C-state CPU wakes up (due to - * an externally generated interrupt), it resume execution where it halted. + * an externally generated interrupt), it resumes execution where it halted. * The CPU returning from Deep C-state must enable interrupts before it will * handle the pending interrupt that woke it from Deep C-state. * @@ -72,7 +72,7 @@ extern "C" { * } timers[32]; * } * - * There are 32 possible timers in an hpet. Only the first 3 timers are + * There are 32 possible timers in an HPET. Only the first 3 timers are * required. The other 29 timers are optional. * * HPETs can have 64-bit or 32-bit timers. Timers/compare registers can @@ -80,7 +80,7 @@ extern "C" { * The first two timers are not used. The HPET spec intends the first two * timers to be used as "legacy replacement" for the PIT and RTC timers. * - * Solaris uses the first available non-legacy replacement timer as a proxy + * illumos uses the first available non-legacy replacement timer as a proxy * timer for processor Local APIC Timers that stop in deep idle C-states. */ @@ -97,7 +97,7 @@ extern "C" { #define HPET_SIZE (1024) /* - * Offsets of hpet registers and macros to access them from HPET base address. + * Offsets of HPET registers and macros to access them from HPET base address. */ #define HPET_GEN_CAP_OFFSET (0) #define HPET_GEN_CONFIG_OFFSET (0x10) diff --git a/usr/src/uts/i86pc/sys/prom_debug.h b/usr/src/uts/i86pc/sys/prom_debug.h new file mode 100644 index 0000000000..ae64d91711 --- /dev/null +++ b/usr/src/uts/i86pc/sys/prom_debug.h @@ -0,0 +1,72 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Oxide Computer Company + */ + +#ifndef _SYS_PROM_DEBUG_H +#define _SYS_PROM_DEBUG_H + +#include <sys/promif.h> + +/* + * These macros are used to emit coarse-grained early boot debugging + * information when the user sets "prom_debug" in the boot environment. They + * should only be used for information that we cannot easily obtain through a + * richer mechanism because the machine hangs or crashes before other debugging + * tools are available. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +extern int prom_debug; + +/* + * Print a string message, used to signal that we have at least reached a + * particular point in the code: + */ +#define PRM_POINT(q) do { \ + if (prom_debug) { \ + prom_printf("%s:%d: %s\n", \ + __FILE__, __LINE__, (q)); \ + } \ + } while (0) + +/* + * Print the name and value of an integer variable: + */ +#define PRM_DEBUG(q) do { \ + if (prom_debug) { \ + prom_printf("%s:%d: '%s' is 0x%llx\n", \ + __FILE__, __LINE__, #q, (long long)(q)); \ + } \ + } while (0) + +/* + * Print the name and value of a string (char *) variable (which may be NULL): + */ +#define PRM_DEBUGS(q) do { \ + if (prom_debug) { \ + const char *qq = q; \ + prom_printf("%s:%d: '%s' is '%s'\n", \ + __FILE__, __LINE__, #q, \ + qq != NULL ? qq : "<NULL>"); \ + } \ + } while (0) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_PROM_DEBUG_H */ diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile index e7f07c4c4e..0106dd0a0f 100644 --- a/usr/src/uts/i86pc/vmm/Makefile +++ b/usr/src/uts/i86pc/vmm/Makefile @@ -43,7 +43,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # Overrides and additions # -CERRWARN += -_gcc=-Wno-empty-body # 3rd party code SMOFF += all_func_returns @@ -51,9 +50,6 @@ SMOFF += all_func_returns # needs work $(OBJS_DIR)/vmm_sol_dev.o := SMOFF += signed_integer_overflow_check -# a can't happen: vmx_setcap() warn: variable dereferenced before check 'pptr' -$(OBJS_DIR)/vmx.o := SMOFF += deref_check - ALL_BUILDS = $(ALL_BUILDSONLY64) DEF_BUILDS = $(DEF_BUILDSONLY64) PRE_INC_PATH = -I$(COMPAT)/bhyve -I$(COMPAT)/bhyve/amd64 \ @@ -61,17 +57,9 @@ PRE_INC_PATH = -I$(COMPAT)/bhyve -I$(COMPAT)/bhyve/amd64 \ INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(UTSBASE)/i86pc/io/vmm/io AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR) -CFLAGS += -_gcc=-Wimplicit-function-declaration -# The FreeBSD %# notation makes gcc gripe -CFLAGS += -_gcc=-Wno-format # enable collection of VMM statistics CFLAGS += -DVMM_KEEP_STATS -$(OBJS_DIR)/vmm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits -$(OBJS_DIR)/svm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits -$(OBJS_DIR)/vmx.o := CERRWARN += -_gcc=-Wno-unused-variable -$(OBJS_DIR)/iommu.o := CERRWARN += -_gcc=-Wno-unused-variable - LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev LDFLAGS += -z type=kmod -M $(MAPFILE) diff --git a/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c index 6b7da6a99a..7be8a4a9f8 100644 --- a/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c +++ b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c @@ -684,13 +684,14 @@ amdf17nbdf_ioctl_kind(intptr_t arg, int mode) } static int -amdf17nbdf_ioctl_temp(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg, int mode) +amdf17nbdf_ioctl_scalar(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg, + int mode) { amdf17nb_t *nb; hrtime_t diff; - sensor_ioctl_temperature_t temp; + sensor_ioctl_scalar_t scalar; - bzero(&temp, sizeof (temp)); + bzero(&scalar, sizeof (scalar)); mutex_enter(&nbdf->amd_nbdf_lock); nb = amdf17nbdf_lookup_nb(nbdf, minor); @@ -710,12 +711,12 @@ amdf17nbdf_ioctl_temp(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg, int mode) } } - temp.sit_unit = SENSOR_UNIT_CELSIUS; - temp.sit_temp = nb->amd_nb_temp; - temp.sit_gran = AMDF17_THERMAL_GRANULARITY; + scalar.sis_unit = SENSOR_UNIT_CELSIUS; + scalar.sis_value = nb->amd_nb_temp; + scalar.sis_gran = AMDF17_THERMAL_GRANULARITY; mutex_exit(&nbdf->amd_nbdf_lock); - if (ddi_copyout(&temp, (void *)arg, sizeof (temp), + if (ddi_copyout(&scalar, (void *)arg, sizeof (scalar), mode & FKIOCTL) != 0) { return (EFAULT); } @@ -737,10 +738,10 @@ amdf17nbdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, m = getminor(dev); switch (cmd) { - case SENSOR_IOCTL_TYPE: + case SENSOR_IOCTL_KIND: return (amdf17nbdf_ioctl_kind(arg, mode)); - case SENSOR_IOCTL_TEMPERATURE: - return (amdf17nbdf_ioctl_temp(nbdf, m, arg, mode)); + case SENSOR_IOCTL_SCALAR: + return (amdf17nbdf_ioctl_scalar(nbdf, m, arg, mode)); default: return (ENOTTY); } diff --git a/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c b/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c index 1330f8563f..17934520fd 100644 --- a/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c +++ b/usr/src/uts/intel/io/amdnbtemp/amdnbtemp.c @@ -103,7 +103,7 @@ typedef struct amdnbtemp { static void *amdnbtemp_state; static int -amdnbtemp_read(void *arg, sensor_ioctl_temperature_t *temp) +amdnbtemp_read(void *arg, sensor_ioctl_scalar_t *scalar) { amdnbtemp_t *at = arg; @@ -120,9 +120,9 @@ amdnbtemp_read(void *arg, sensor_ioctl_temperature_t *temp) at->at_temp -= AMDNBTEMP_TEMP_ADJUST; } - temp->sit_unit = SENSOR_UNIT_CELSIUS; - temp->sit_gran = AMDNBTEMP_GRANULARITY; - temp->sit_temp = at->at_temp; + scalar->sis_unit = SENSOR_UNIT_CELSIUS; + scalar->sis_gran = AMDNBTEMP_GRANULARITY; + scalar->sis_value = at->at_temp; mutex_exit(&at->at_mutex); return (0); @@ -130,7 +130,7 @@ amdnbtemp_read(void *arg, sensor_ioctl_temperature_t *temp) static const ksensor_ops_t amdnbtemp_temp_ops = { .kso_kind = ksensor_kind_temperature, - .kso_temp = amdnbtemp_read + .kso_scalar = amdnbtemp_read }; static void diff --git a/usr/src/uts/intel/io/coretemp/coretemp.c b/usr/src/uts/intel/io/coretemp/coretemp.c index ee2d143554..bea8078002 100644 --- a/usr/src/uts/intel/io/coretemp/coretemp.c +++ b/usr/src/uts/intel/io/coretemp/coretemp.c @@ -259,7 +259,7 @@ coretemp_update(coretemp_t *ct, coretemp_sensor_t *sensor, cmi_hdl_t hdl) } static int -coretemp_read(void *arg, sensor_ioctl_temperature_t *sit) +coretemp_read(void *arg, sensor_ioctl_scalar_t *scalar) { coretemp_sensor_t *sensor = arg; coretemp_t *ct = sensor->cs_coretemp; @@ -313,10 +313,10 @@ coretemp_read(void *arg, sensor_ioctl_temperature_t *sit) sensor->cs_temperature = sensor->cs_tjmax - reading; sensor->cs_resolution = resolution; - sit->sit_unit = SENSOR_UNIT_CELSIUS; - sit->sit_temp = sensor->cs_temperature; - sit->sit_gran = CORETEMP_GRANULARITY; - sit->sit_prec = sensor->cs_resolution; + scalar->sis_unit = SENSOR_UNIT_CELSIUS; + scalar->sis_value = sensor->cs_temperature; + scalar->sis_gran = CORETEMP_GRANULARITY; + scalar->sis_prec = sensor->cs_resolution; mutex_exit(&ct->coretemp_mutex); return (0); @@ -324,7 +324,7 @@ coretemp_read(void *arg, sensor_ioctl_temperature_t *sit) static const ksensor_ops_t coretemp_temp_ops = { .kso_kind = ksensor_kind_temperature, - .kso_temp = coretemp_read + .kso_scalar = coretemp_read }; static void diff --git a/usr/src/uts/intel/io/pchtemp/pchtemp.c b/usr/src/uts/intel/io/pchtemp/pchtemp.c index 4aeb098112..2cfd7ae806 100644 --- a/usr/src/uts/intel/io/pchtemp/pchtemp.c +++ b/usr/src/uts/intel/io/pchtemp/pchtemp.c @@ -137,7 +137,7 @@ pchtemp_read_check(pchtemp_t *pch) } static int -pchtemp_read(void *arg, sensor_ioctl_temperature_t *sit) +pchtemp_read(void *arg, sensor_ioctl_scalar_t *scalar) { uint16_t temp, ctt, tahv, talv; uint8_t tsel; @@ -175,9 +175,9 @@ pchtemp_read(void *arg, sensor_ioctl_temperature_t *sit) } pch->pcht_temp = (temp & PCHTEMP_REG_TEMP_TSR) - PCHTEMP_TEMP_OFFSET; - sit->sit_unit = SENSOR_UNIT_CELSIUS; - sit->sit_gran = PCHTEMP_TEMP_RESOLUTION; - sit->sit_temp = pch->pcht_temp; + scalar->sis_unit = SENSOR_UNIT_CELSIUS; + scalar->sis_gran = PCHTEMP_TEMP_RESOLUTION; + scalar->sis_value = pch->pcht_temp; mutex_exit(&pch->pcht_mutex); return (0); @@ -185,7 +185,7 @@ pchtemp_read(void *arg, sensor_ioctl_temperature_t *sit) static const ksensor_ops_t pchtemp_temp_ops = { .kso_kind = ksensor_kind_temperature, - .kso_temp = pchtemp_read + .kso_scalar = pchtemp_read }; static void diff --git a/usr/src/uts/intel/tem/Makefile b/usr/src/uts/intel/tem/Makefile index 9eca2e7d98..1165cf3264 100644 --- a/usr/src/uts/intel/tem/Makefile +++ b/usr/src/uts/intel/tem/Makefile @@ -38,7 +38,6 @@ UTSBASE = ../.. # MODULE = tem OBJECTS = $(TEM_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(TEM_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) # @@ -50,22 +49,11 @@ include $(UTSBASE)/intel/Makefile.intel # Define targets # ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) LDFLAGS += -dy -Ndacf/consconfig_dacf # -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_STATIC_UNUSED - -CERRWARN += -_gcc=-Wno-unused-function -CERRWARN += $(CNOWARN_UNINIT) - -# # Default build targets. # .KEEP_STATE: @@ -78,12 +66,6 @@ clean: $(CLEAN_DEPS) clobber: $(CLOBBER_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) # diff --git a/usr/src/uts/sparc/tem/Makefile b/usr/src/uts/sparc/tem/Makefile index 12d9741c56..ee46e5852d 100644 --- a/usr/src/uts/sparc/tem/Makefile +++ b/usr/src/uts/sparc/tem/Makefile @@ -40,9 +40,6 @@ UTSBASE = ../.. # MODULE = tem OBJECTS = $(TEM_OBJS:%=$(OBJS_DIR)/%) $(FONT_OBJS:%=$(OBJS_DIR)/%) - -LINTS = $(TEM_OBJS:%.o=$(LINTS_DIR)/%.ln) -LINTS += $(FONT_OBJS:%.o=$(LINTS_DIR)/%.ln) ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) # @@ -54,24 +51,11 @@ include $(UTSBASE)/sparc/Makefile.sparc # Define targets # ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) -CFLAGS += $(CCVERBOSE) - LDFLAGS += -dy -Ndacf/consconfig_dacf # -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_STATIC_UNUSED - -CERRWARN += -_gcc=-Wno-unused-function -CERRWARN += $(CNOWARN_UNINIT) - -# # Default build targets. # .KEEP_STATE: @@ -84,19 +68,9 @@ clean: $(CLEAN_DEPS) clobber: $(CLOBBER_DEPS) -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - install: $(INSTALL_DEPS) # # Include common targets. # include $(UTSBASE)/sparc/Makefile.targ - -CLOBBERFILES += \ - $(OBJS_DIR)/$(VGATEXT_FONT).o \ - $(OBJS_DIR)/$(VGATEXT_FONT).c diff --git a/usr/src/uts/sun/sys/ser_async.h b/usr/src/uts/sun/sys/ser_async.h index 8e8a573829..0f89bce4c5 100644 --- a/usr/src/uts/sun/sys/ser_async.h +++ b/usr/src/uts/sun/sys/ser_async.h @@ -27,8 +27,6 @@ #ifndef _SYS_SER_ASYNC_H #define _SYS_SER_ASYNC_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Initial port setup parameters for async lines */ @@ -57,9 +55,9 @@ extern "C" { #define ZFIFOSZ 3 /* - * this macro needs a constant Hertz, but we can now have a hires_tick. + * This macro needs a constant 100 Hz, but hires_tick or hz may change that. * ztdelay in zs_async.c converts to a true delay based on hz so we - * can use 100 for Hertz here. + * can use 100 Hz here. */ #define ZDELAY(n) ZSDelayConst(100, ZFIFOSZ, NBBY, n) @@ -166,9 +164,9 @@ struct asyncline { * and the second byte is the actual data. The ring buffer * needs to be defined as ushort_t to accomodate this. */ - ushort_t za_ring[RINGSIZE]; + ushort_t za_ring[RINGSIZE]; timeout_id_t za_kick_rcv_id; - int za_kick_rcv_count; + int za_kick_rcv_count; timeout_id_t za_zsa_restart_id; bufcall_id_t za_bufcid; mblk_t *za_rstandby[ZSA_MAX_RSTANDBY]; diff --git a/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c b/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c index aa96f19079..a8ceea0344 100644 --- a/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c +++ b/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. + */ + #include <sys/file.h> #include <sys/sunndi.h> #include <sys/sunddi.h> @@ -101,7 +105,6 @@ static uint64_t counter_reg_offsets[] = { static ldi_ident_t ldi_identifier; static boolean_t ldi_identifier_valid = B_FALSE; -static cred_t *credentials = NULL; /* Called by _init to determine if it is OK to install driver. */ int @@ -116,7 +119,6 @@ fpc_platform_module_init(dev_info_t *dip) { int status; - credentials = crget(); status = ldi_ident_from_dip(dip, &ldi_identifier); if (status == 0) ldi_identifier_valid = B_TRUE; @@ -211,8 +213,6 @@ fpc_platform_module_fini(dev_info_t *dip) { if (ldi_identifier_valid) ldi_ident_release(ldi_identifier); - if (credentials) - crfree(credentials); } fire_perfreg_handle_t @@ -226,7 +226,7 @@ fpc_get_perfreg_handle(int devnum) if ((handle_impl->devspec = fpc_get_platform_data_by_number(devnum)) != NULL) { rval = ldi_open_by_name(handle_impl->devspec->nodename, - OPEN_FLAGS, credentials, &handle_impl->devhandle, + OPEN_FLAGS, kcred, &handle_impl->devhandle, ldi_identifier); } @@ -243,7 +243,7 @@ fpc_free_counter_handle(fire_perfreg_handle_t handle) { fire_counter_handle_impl_t *handle_impl = (fire_counter_handle_impl_t *)handle; - (void) ldi_close(handle_impl->devhandle, OPEN_FLAGS, credentials); + (void) ldi_close(handle_impl->devhandle, OPEN_FLAGS, kcred); kmem_free(handle_impl, sizeof (fire_counter_handle_impl_t)); return (SUCCESS); } @@ -281,7 +281,7 @@ fpc_event_io(fire_perfreg_handle_t handle, fire_perfcnt_t group, /* Read original value. */ if (((rval = ldi_ioctl(handle_impl->devhandle, cmd, (intptr_t)&prg, - FKIOCTL, credentials, &ioctl_rval)) == SUCCESS) && (!is_write)) { + FKIOCTL, kcred, &ioctl_rval)) == SUCCESS) && (!is_write)) { *reg_data = prg.data; } @@ -322,7 +322,7 @@ fpc_counter_io(fire_perfreg_handle_t handle, fire_perfcnt_t group, prg.data = *value; if (((rval = ldi_ioctl(handle_impl->devhandle, command, (intptr_t)&prg, - FKIOCTL, credentials, &ioctl_rval)) == SUCCESS) && (!is_write)) { + FKIOCTL, kcred, &ioctl_rval)) == SUCCESS) && (!is_write)) { *value = prg.data; } diff --git a/usr/src/uts/sun4v/ontario/io/tsalarm.c b/usr/src/uts/sun4v/ontario/io/tsalarm.c index 7fb9577028..6f80db8e88 100644 --- a/usr/src/uts/sun4v/ontario/io/tsalarm.c +++ b/usr/src/uts/sun4v/ontario/io/tsalarm.c @@ -544,7 +544,7 @@ FAIL: if (rv != 0) { if (softc->flags & TSAL_OPENED) (void) ldi_close(softc->lh, FREAD|FWRITE, credp); - if (softc->flags * TSAL_IDENTED) + if (softc->flags & TSAL_IDENTED) (void) ldi_ident_release(softc->li); softc->flags &= ~(TSAL_OPENED | TSAL_IDENTED); if (softc->req_ptr != NULL) |