summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorDan McDonald <danmcd@joyent.com>2022-02-18 14:26:58 -0500
committerDan McDonald <danmcd@joyent.com>2022-02-18 14:26:58 -0500
commit6c5a782db588047b641eaac2971cdd95add02bdf (patch)
tree483a44da347ac8ec1f256591389ad5cb09556dee /usr/src
parentc14dafef3b7615cc4991d02e1a9cdf506fdbc735 (diff)
parent59b827862fcc03b4da50df402eeb6288a75ac015 (diff)
downloadillumos-joyent-6c5a782db588047b641eaac2971cdd95add02bdf.tar.gz
[illumos-gate merge]
commit c2cd3a449cfa117e3a164f66931fa6c26c762945 14022 zpool online -e breaks access to pool commit 957246c9e6c47389c40079995d73eebcc659fb29 14456 bhyve needs fpu import/export commit 4dde95dacc64b35aa9882fcbd0a847355d130734 14501 pcieadm could decode rcld commit b302a2007db5ab3847583f9a046d41c11789c092 14512 iwn: suspicious concatenation of string literals commit 6f0e4dc91b854250fff5c24de2d27aed3375ac69 14469 nvme could raise dynamic lun expansion sysevents commit cd0d4b4073e62fa22997078b1595f399434a1047 14450 Want PCI platform resource discovery module commit 55855f50d61b53851853bf1fdcdb04d4b63a1734 14488 lex: clean up warnings Conflicts: manifest
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/bhyvectl/Makefile3
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c100
-rw-r--r--usr/src/cmd/pcieadm/pcieadm_cfgspace.c88
-rw-r--r--usr/src/cmd/sgs/lex/Makefile.com3
-rw-r--r--usr/src/cmd/sgs/lex/common/sub1.c19
-rw-r--r--usr/src/cmd/sgs/lex/common/sub2.c6
-rw-r--r--usr/src/cmd/sgs/lex/common/sub3.c7
-rw-r--r--usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c31
-rw-r--r--usr/src/compat/bhyve/amd64/machine/fpu.h28
-rw-r--r--usr/src/compat/bhyve/amd64/machine/pcb.h21
-rw-r--r--usr/src/lib/libzfs/common/libzfs_pool.c18
-rw-r--r--usr/src/pkg/manifests/system-bhyve-tests.p5m1
-rw-r--r--usr/src/pkg/manifests/system-kernel-platform.p5m2
-rw-r--r--usr/src/test/bhyve-tests/runfiles/default.run2
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/Makefile3
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/common.c5
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/common.h2
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c333
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/mem_partial.c5
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c5
-rw-r--r--usr/src/uts/common/io/blkdev/blkdev.c127
-rw-r--r--usr/src/uts/common/io/iwn/if_iwnreg.h14
-rw-r--r--usr/src/uts/common/io/nvme/nvme.c43
-rw-r--r--usr/src/uts/common/sys/Makefile8
-rw-r--r--usr/src/uts/common/sys/plat/pci_prd.h130
-rw-r--r--usr/src/uts/i86pc/Makefile.files4
-rw-r--r--usr/src/uts/i86pc/Makefile.i86pc2
-rw-r--r--usr/src/uts/i86pc/io/pci/mps_table.h (renamed from usr/src/uts/intel/io/pci/mps_table.h)8
-rw-r--r--usr/src/uts/i86pc/io/pci/pci_prd_i86pc.c (renamed from usr/src/uts/intel/io/pci/pci_resource.c)451
-rw-r--r--usr/src/uts/i86pc/io/pci/pcihrt.h (renamed from usr/src/uts/intel/io/pci/pcihrt.h)8
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c1
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h4
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c69
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c95
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c62
-rw-r--r--usr/src/uts/i86pc/os/hma_fpu.c264
-rw-r--r--usr/src/uts/i86pc/os/pci_bios.c21
-rw-r--r--usr/src/uts/i86pc/pci_prd/Makefile66
-rw-r--r--usr/src/uts/i86pc/sys/hma.h38
-rw-r--r--usr/src/uts/i86pc/sys/pci_cfgspace_impl.h11
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h22
-rw-r--r--usr/src/uts/intel/Makefile.files2
-rw-r--r--usr/src/uts/intel/io/pci/pci_autoconfig.c27
-rw-r--r--usr/src/uts/intel/io/pci/pci_boot.c380
-rw-r--r--usr/src/uts/intel/io/pci/pci_memlist.c4
-rw-r--r--usr/src/uts/intel/os/archdep.c2
-rw-r--r--usr/src/uts/intel/os/fpu.c41
-rw-r--r--usr/src/uts/intel/pci_autoconfig/Makefile4
-rw-r--r--usr/src/uts/intel/sys/fp.h45
49 files changed, 1919 insertions, 716 deletions
diff --git a/usr/src/cmd/bhyvectl/Makefile b/usr/src/cmd/bhyvectl/Makefile
index 01d331c823..486f39da31 100644
--- a/usr/src/cmd/bhyvectl/Makefile
+++ b/usr/src/cmd/bhyvectl/Makefile
@@ -35,6 +35,9 @@ CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
-I$(SRC)/uts/i86pc
LDLIBS += -lvmmapi
+# Force c99 for everything
+CSTD= $(CSTD_GNU99)
+
CERRWARN += -_gcc=-Wno-uninitialized
# main() is too hairy for smatch
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 4fc6ddc251..cbe779a4ea 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -51,6 +51,9 @@ __FBSDID("$FreeBSD$");
#include <sys/errno.h>
#include <sys/mman.h>
#include <sys/cpuset.h>
+#ifndef __FreeBSD__
+#include <sys/fp.h>
+#endif /* __FreeBSD__ */
#include <stdio.h>
#include <stdlib.h>
@@ -312,6 +315,7 @@ static int get_cpu_topology;
#ifndef __FreeBSD__
static int pmtmr_port;
static int wrlock_cycle;
+static int get_fpu;
#endif
/*
@@ -1534,6 +1538,7 @@ setup_options(bool cpu_intel)
#ifndef __FreeBSD__
{ "pmtmr-port", REQ_ARG, 0, PMTMR_PORT },
{ "wrlock-cycle", NO_ARG, &wrlock_cycle, 1 },
+ { "get-fpu", NO_ARG, &get_fpu, 1 },
#endif
};
@@ -1752,6 +1757,93 @@ show_memseg(struct vmctx *ctx)
}
}
+#ifndef __FreeBSD__
+static int
+show_fpu(struct vmctx *ctx, int vcpu)
+{
+ int res, fd;
+
+ struct vm_fpu_desc_entry entries[64];
+ struct vm_fpu_desc desc = {
+ .vfd_entry_data = entries,
+ .vfd_num_entries = 64,
+ };
+ fd = vm_get_device_fd(ctx);
+ res = ioctl(fd, VM_DESC_FPU_AREA, &desc);
+ if (res != 0) {
+ return (errno);
+ }
+ for (uint_t i = 0; i < desc.vfd_num_entries; i++) {
+ const struct vm_fpu_desc_entry *entry = &entries[i];
+
+ /* confirm that AVX fields are where we expect */
+ if (entry->vfde_feature == XFEATURE_AVX) {
+ if (entry->vfde_size != 0x100 ||
+ entry->vfde_off != 0x240) {
+ (void) fprintf(stderr,
+ "show_fpu: unexpected AVX size/placement "
+ "- size:%x off:%x\n",
+ entry->vfde_size, entry->vfde_off);
+ return (EINVAL);
+ }
+ }
+ }
+ void *buf = malloc(desc.vfd_req_size);
+ if (buf == NULL) {
+ return (ENOMEM);
+ }
+ struct vm_fpu_state req = {
+ .vcpuid = vcpu,
+ .buf = buf,
+ .len = desc.vfd_req_size,
+ };
+ res = ioctl(fd, VM_GET_FPU, &req);
+ if (res != 0) {
+ res = errno;
+ free(buf);
+ return (res);
+ }
+
+ const struct xsave_state *state = buf;
+ const struct fxsave_state *fx = &state->xs_fxsave;
+ (void) printf("fpu_fcw[%d]\t\t0x%04x\n", vcpu, fx->fx_fcw);
+ (void) printf("fpu_fsw[%d]\t\t0x%04x\n", vcpu, fx->fx_fsw);
+ (void) printf("fpu_ftw[%d]\t\t0x%04x\n", vcpu, fx->fx_fctw);
+ (void) printf("fpu_fop[%d]\t\t0x%04x\n", vcpu, fx->fx_fop);
+ (void) printf("fpu_rip[%d]\t\t0x%016lx\n", vcpu, fx->fx_rip);
+ (void) printf("fpu_rdp[%d]\t\t0x%016lx\n", vcpu, fx->fx_rdp);
+ (void) printf("fpu_mxcsr[%d]\t\t0x%08x\n", vcpu, fx->fx_mxcsr);
+ (void) printf("fpu_mxcsr_mask[%d]\t0x%08x\n", vcpu,
+ fx->fx_mxcsr_mask);
+ /* ST/MMX regs */
+ for (uint_t i = 0; i < 8; i++) {
+ (void) printf("fpu_st%u[%d]\t\t0x%08x%08x%08x%08x\n", vcpu, i,
+ fx->fx_st[i].__fpr_pad[0], fx->fx_st[i].__fpr_pad[1],
+ fx->fx_st[i].__fpr_pad[2], fx->fx_st[i].__fpr_pad[3]);
+ }
+ /* SSE regs */
+ for (uint_t i = 0; i < 16; i++) {
+ (void) printf("fpu_xmm%u[%d]\t\t0x%08x%08x%08x%08x\n",
+ i, vcpu,
+ fx->fx_xmm[i]._l[0], fx->fx_xmm[i]._l[1],
+ fx->fx_xmm[i]._l[2], fx->fx_xmm[i]._l[3]);
+ }
+
+ if (state->xs_header.xsh_xstate_bv & XFEATURE_AVX) {
+ /* AVX regs */
+ for (uint_t i = 0; i < 16; i++) {
+ (void) printf("fpu_ymm%u[%d]\t\t0x%08x%08x%08x%08x\n",
+ i, vcpu,
+ state->xs_ymm[i]._l[0], state->xs_ymm[i]._l[1],
+ state->xs_ymm[i]._l[2], state->xs_ymm[i]._l[3]);
+ }
+ }
+
+ free(buf);
+ return (0);
+}
+#endif /*__FreeBSD__ */
+
int
main(int argc, char *argv[])
{
@@ -2150,6 +2242,12 @@ main(int argc, char *argv[])
if (!error)
error = get_all_segments(ctx, vcpu);
+#ifndef __FreeBSD__
+ if (!error && (get_fpu || get_all)) {
+ error = show_fpu(ctx, vcpu);
+ }
+#endif /* __FreeBSD__ */
+
if (!error) {
if (cpu_intel)
error = get_misc_vmcs(ctx, vcpu);
diff --git a/usr/src/cmd/pcieadm/pcieadm_cfgspace.c b/usr/src/cmd/pcieadm/pcieadm_cfgspace.c
index 420613da75..73841d4c23 100644
--- a/usr/src/cmd/pcieadm/pcieadm_cfgspace.c
+++ b/usr/src/cmd/pcieadm/pcieadm_cfgspace.c
@@ -4267,6 +4267,91 @@ pcieadm_cap_info_ht(pcieadm_cfgspace_walk_t *walkp,
}
}
+/*
+ * Root Complex Link Declaration
+ */
+static pcieadm_regdef_t pcieadm_regdef_rcld_desc[] = {
+ { 0, 3, "type", "Element Type", PRDV_STRVAL,
+ .prd_val = { .prdv_strval = { "Configuration Space Element",
+ "System Egress Port or internal sink",
+ "Internal Root Complex Link" } } },
+ { 8, 15, "num", "Number of Entries", PRDV_HEX },
+ { 16, 23, "id", "Component ID", PRDV_HEX },
+ { 24, 31, "port", "Port Number", PRDV_HEX },
+ { -1, -1, NULL }
+};
+
+static pcieadm_regdef_t pcieadm_regdef_rcld_link[] = {
+ { 0, 0, "valid", "Link Valid", PRDV_STRVAL,
+ .prd_val = { .prdv_strval = { "no", "yes" } } },
+ { 1, 1, "type", "Link Type", PRDV_STRVAL,
+ .prd_val = { .prdv_strval = { "RCRB", "Configuration Space" } } },
+ { 2, 2, "rcrb", "Assosciate RCRB", PRDV_STRVAL,
+ .prd_val = { .prdv_strval = { "no", "yes" } } },
+ { 16, 23, "tid", "Target Component ID", PRDV_HEX },
+ { 24, 31, "tport", "Target Port Number", PRDV_HEX },
+ { -1, -1, NULL }
+};
+
+/*
+ * Print a variable number of Root Complex Links.
+ */
+static void
+pcieadm_cfgspace_print_rcld(pcieadm_cfgspace_walk_t *walkp,
+ pcieadm_cfgspace_print_t *print, void *arg)
+{
+ uint_t nlinks = walkp->pcw_data->pcb_u8[walkp->pcw_capoff + 5];
+
+ for (uint_t i = 0; i < nlinks; i++) {
+ char mshort[32], mhuman[128];
+ pcieadm_cfgspace_print_t p;
+ uint16_t off = print->pcp_off + i * 0x10;
+ uint8_t type = walkp->pcw_data->pcb_u8[walkp->pcw_capoff + off];
+
+ (void) snprintf(mshort, sizeof (mshort), "link%udesc", i);
+ (void) snprintf(mhuman, sizeof (mhuman), "Link %u Description");
+
+ p.pcp_off = off;
+ p.pcp_len = 4;
+ p.pcp_short = mshort;
+ p.pcp_human = mhuman;
+ p.pcp_print = pcieadm_cfgspace_print_regdef;
+ p.pcp_arg = pcieadm_regdef_rcld_link;
+
+ p.pcp_print(walkp, &p, p.pcp_arg);
+
+ /*
+ * The way that we print the link depends on the actual type of
+ * link which is in bit 2 of the link description.
+ */
+ p.pcp_off += 8;
+
+ if ((type & (1 << 1)) == 0) {
+ (void) snprintf(mshort, sizeof (mshort),
+ "link%uaddr", i);
+ (void) snprintf(mhuman, sizeof (mhuman),
+ "Link %u Address");
+ p.pcp_len = 8;
+ p.pcp_print = pcieadm_cfgspace_print_hex;
+ p.pcp_arg = NULL;
+
+ p.pcp_print(walkp, &p, p.pcp_arg);
+ } else {
+ warnx("encountered unsupported RCLD Link Address");
+ }
+ }
+}
+
+static pcieadm_cfgspace_print_t pcieadm_cap_rcld[] = {
+ { 0x0, 4, "caphdr", "Capability Header",
+ pcieadm_cfgspace_print_regdef, pcieadm_regdef_pcie_caphdr },
+ { 0x4, 4, "desc", "Self Description",
+ pcieadm_cfgspace_print_regdef, pcieadm_regdef_rcld_desc },
+ { 0x10, 0x10, "link", "Link Entry", pcieadm_cfgspace_print_rcld },
+ { -1, -1, NULL }
+};
+
+
pcieadm_pci_cap_t pcieadm_pci_caps[] = {
{ PCI_CAP_ID_PM, "pcipm", "PCI Power Management",
pcieadm_cap_info_pcipm, { { 2, 8, pcieadm_cap_pcipm_v3 },
@@ -4319,7 +4404,8 @@ pcieadm_pci_cap_t pcieadm_pcie_caps[] = {
{ PCIE_EXT_CAP_ID_PWR_BUDGET, "powbudg", "Power Budgeting",
pcieadm_cap_info_vers, { { 1, 0x10, pcieadm_cap_powbudg } } },
{ PCIE_EXT_CAP_ID_RC_LINK_DECL, "rcld",
- "Root Complex Link Declaration" },
+ "Root Complex Link Declaration", pcieadm_cap_info_vers,
+ { { 1, 0x1c, pcieadm_cap_rcld } } },
{ PCIE_EXT_CAP_ID_RC_INT_LINKCTRL, "rcilc",
"Root Complex Internal Link Control" },
{ PCIE_EXT_CAP_ID_RC_EVNT_CEA, "rcecea",
diff --git a/usr/src/cmd/sgs/lex/Makefile.com b/usr/src/cmd/sgs/lex/Makefile.com
index c8100b1e3f..1b40639314 100644
--- a/usr/src/cmd/sgs/lex/Makefile.com
+++ b/usr/src/cmd/sgs/lex/Makefile.com
@@ -55,9 +55,8 @@ SRCDIR = ../common
CSTD= $(CSTD_GNU99)
+# unused labels in yaccpar
CERRWARN += -_gcc=-Wno-unused-label
-CERRWARN += $(CNOWARN_UNINIT)
-CERRWARN += -_gcc=-Wno-parentheses
# Override default source file derivation rule (in Makefile.lib)
# from objects
diff --git a/usr/src/cmd/sgs/lex/common/sub1.c b/usr/src/cmd/sgs/lex/common/sub1.c
index f1d3fa601b..e63a55e34b 100644
--- a/usr/src/cmd/sgs/lex/common/sub1.c
+++ b/usr/src/cmd/sgs/lex/common/sub1.c
@@ -131,15 +131,16 @@ warning(char *s, ...)
{
va_list ap;
- if (!eof)
- if (!yyline)
+ if (!eof) {
+ if (!yyline) {
(void) fprintf(errorf, "Command line: ");
- else {
+ } else {
(void) fprintf(errorf,
!no_input ? "" : "\"%s\":", sargv[optind]);
(void) fprintf(errorf,
"line %d: ", yyline);
}
+ }
(void) fprintf(errorf, "Warning: ");
va_start(ap, s);
(void) vfprintf(errorf, s, ap);
@@ -171,8 +172,8 @@ index(int a, CHR *s)
int
alpha(int c)
{
- return ('a' <= c && c <= 'z' ||
- 'A' <= c && c <= 'Z');
+ return (('a' <= c && c <= 'z') ||
+ ('A' <= c && c <= 'Z'));
}
int
@@ -209,7 +210,7 @@ scopy(CHR *s, CHR *t)
{
CHR *i;
i = t;
- while (*i++ = *s++)
+ while ((*i++ = *s++) != 0)
;
}
@@ -494,7 +495,7 @@ cpycom(CHR *p)
(void) putc(*t++, fout);
}
(void) putc('\n', fout);
- while (c = gch()) {
+ while ((c = gch()) != 0) {
while (c == '*') {
(void) putc((char)c, fout);
if ((c = gch()) == '/') {
@@ -570,7 +571,7 @@ cpyact(void)
goto swt;
(void) putwc(c, fout);
savline = yyline;
- while (c = gch()) {
+ while ((c = gch()) != 0) {
while (c == '*') {
(void) putwc(c, fout);
if ((c = gch()) == '/') {
@@ -591,7 +592,7 @@ cpyact(void)
case '"': /* character string */
mth = c;
(void) putwc(c, fout);
- while (c = gch()) {
+ while ((c = gch()) != 0) {
if (c == '\\') {
(void) putwc(c, fout);
c = gch();
diff --git a/usr/src/cmd/sgs/lex/common/sub2.c b/usr/src/cmd/sgs/lex/common/sub2.c
index 84ff4e4699..399503d7d9 100644
--- a/usr/src/cmd/sgs/lex/common/sub2.c
+++ b/usr/src/cmd/sgs/lex/common/sub2.c
@@ -462,9 +462,9 @@ nextstate(int s, int c)
for (i = 0; i < num; i++) {
curpos = *pos++;
j = name[curpos];
- if ((!ISOPERATOR(j)) && j == c ||
- j == RSTR && c == right[curpos] ||
- j == RCCL && member(c, (CHR *) left[curpos])) {
+ if ((!ISOPERATOR(j) && j == c) ||
+ (j == RSTR && c == right[curpos]) ||
+ (j == RCCL && member(c, (CHR *) left[curpos]))) {
f = foll[curpos];
number = *f;
newpos = f+1;
diff --git a/usr/src/cmd/sgs/lex/common/sub3.c b/usr/src/cmd/sgs/lex/common/sub3.c
index 107881958d..4b9cea94ab 100644
--- a/usr/src/cmd/sgs/lex/common/sub3.c
+++ b/usr/src/cmd/sgs/lex/common/sub3.c
@@ -134,12 +134,13 @@ remch(wchar_t c)
* Make sure no EUC chars are used in reg. exp.
*/
if (!handleeuc) {
- if (!isascii(c))
+ if (!isascii(c)) {
if (iswprint(c))
warning(
"Non-ASCII character '%wc' in pattern; use -w or -e lex option.", c);
else warning(
"Non-ASCII character of value %#x in pattern; use -w or -e lex option.", c);
+ }
/* In any case, we don't need to construct ncgidtbl[]. */
return;
}
@@ -301,7 +302,7 @@ repbycgid(void)
symbol[j] = FALSE;
s = (CHR *) left[i];
- while (cc = *s++) {
+ while ((cc = *s++) != 0) {
if (cc == RANGE) {
int low, high, i;
/*
@@ -388,7 +389,7 @@ repbycgid(void)
static void
setsymbol(int i)
{
- if (i > sizeof (symbol))
+ if (i > (int)sizeof (symbol))
error("setsymbol: (SYSERR) %d out of range", i);
symbol[i] = TRUE;
}
diff --git a/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c b/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c
index 4697128c90..82c296a669 100644
--- a/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c
+++ b/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c
@@ -22,6 +22,7 @@
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
/*
@@ -32,15 +33,15 @@
*
* When a device is added to the system:
*
- * 1. Search for any vdevs whose devid matches that of the newly added
+ * 1. Search for any vdevs whose devid matches that of the newly added
* device.
*
- * 2. If no vdevs are found, then search for any vdevs whose devfs path
+ * 2. If no vdevs are found, then search for any vdevs whose devfs path
* matches that of the new device.
*
* 3. If no vdevs match by either method, then ignore the event.
*
- * 4. Attempt to online the device with a flag to indicate that it should
+ * 4. Attempt to online the device with a flag to indicate that it should
* be unspared when resilvering completes. If this succeeds, then the
* same device was inserted and we should continue normally.
*
@@ -319,11 +320,11 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
* string. However, we allow substring matches in the following
* cases:
*
- * <path>: This is a devpath, and the target is one
- * of its children.
+ * <path>: This is a devpath, and the target is one
+ * of its children.
*
- * <path/> This is a devid for a whole disk, and
- * the target is one of its children.
+ * <path/> This is a devid for a whole disk, and
+ * the target is one of its children.
*/
if (path[len] != '\0' && path[len] != ':' &&
path[len - 1] != '/')
@@ -555,7 +556,7 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
vdev_state_t newstate;
nvlist_t *tgt;
- syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n",
+ syseventd_print(9, "%s: searching for %s in pool %s\n", __func__,
devname, zpool_get_name(zhp));
if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
@@ -568,6 +569,11 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
&wholedisk) == 0);
+ syseventd_print(9, "%s: "
+ "found %s in pool %s (wholedisk: %s)\n", __func__,
+ path, zpool_get_name(zhp),
+ wholedisk != 0 ? "true" : "false");
+
(void) strlcpy(fullpath, path, sizeof (fullpath));
if (wholedisk) {
fullpath[strlen(fullpath) - 2] = '\0';
@@ -581,12 +587,13 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
}
if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
- syseventd_print(9, "zfsdle_vdev_online: setting device"
- " device %s to ONLINE state in pool %s.\n",
- fullpath, zpool_get_name(zhp));
- if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
+ syseventd_print(9, "%s: "
+ "setting device %s to ONLINE state in pool %s.\n",
+ __func__, fullpath, zpool_get_name(zhp));
+ if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
(void) zpool_vdev_online(zhp, fullpath, 0,
&newstate);
+ }
}
zpool_close(zhp);
return (1);
diff --git a/usr/src/compat/bhyve/amd64/machine/fpu.h b/usr/src/compat/bhyve/amd64/machine/fpu.h
deleted file mode 100644
index 6bc651d996..0000000000
--- a/usr/src/compat/bhyve/amd64/machine/fpu.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2014 Pluribus Networks Inc.
- * Copyright (c) 2018, Joyent, Inc.
- */
-
-#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_
-#define _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_
-
-void fpuexit(kthread_t *td);
-void fpurestore(void *);
-void fpusave(void *);
-
-struct savefpu *fpu_save_area_alloc(void);
-void fpu_save_area_free(struct savefpu *fsa);
-void fpu_save_area_reset(struct savefpu *fsa);
-
-#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_ */
diff --git a/usr/src/compat/bhyve/amd64/machine/pcb.h b/usr/src/compat/bhyve/amd64/machine/pcb.h
deleted file mode 100644
index 75b5de640c..0000000000
--- a/usr/src/compat/bhyve/amd64/machine/pcb.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2014 Pluribus Networks Inc.
- */
-
-#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_
-#define _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_
-
-#include <machine/fpu.h>
-
-#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_ */
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index f539ef1f76..beae63a0ca 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -27,6 +27,7 @@
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
#include <ctype.h>
@@ -39,6 +40,7 @@
#include <strings.h>
#include <unistd.h>
#include <libgen.h>
+#include <sys/dkio.h>
#include <sys/efi_partition.h>
#include <sys/vtoc.h>
#include <sys/zfs_ioctl.h>
@@ -2801,6 +2803,7 @@ static int
zpool_relabel_disk(libzfs_handle_t *hdl, const char *name, const char *msg)
{
char path[MAXPATHLEN];
+ enum dkio_state st;
int fd, error;
int (*_efi_use_whole_disk)(int);
@@ -2822,12 +2825,25 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name, const char *msg)
* ignore that error and continue on.
*/
error = _efi_use_whole_disk(fd);
- (void) close(fd);
if (error && error != VT_ENOSPC) {
+ (void) close(fd);
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
"relabel '%s': unable to read disk capacity"), name);
return (zfs_error(hdl, EZFS_NOCAP, msg));
}
+
+ /*
+ * Writing a new EFI partition table to the disk will have marked
+ * the geometry as needing re-validation. Before returning, force
+ * it to be checked by querying the device state, otherwise the
+ * subsequent vdev_reopen() will very likely fail to read the device
+ * size, faulting the pool.
+ */
+ st = DKIO_NONE;
+ (void) ioctl(fd, DKIOCSTATE, (caddr_t)&st);
+
+ (void) close(fd);
+
return (0);
}
diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m
index 5b4a7351c4..823ed69a60 100644
--- a/usr/src/pkg/manifests/system-bhyve-tests.p5m
+++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m
@@ -38,6 +38,7 @@ file path=opt/bhyve-tests/tests/mevent/read_requeue mode=0555
file path=opt/bhyve-tests/tests/mevent/vnode_file mode=0555
file path=opt/bhyve-tests/tests/mevent/vnode_zvol mode=0555
dir path=opt/bhyve-tests/tests/vmm
+file path=opt/bhyve-tests/tests/vmm/fpu_getset mode=0555
file path=opt/bhyve-tests/tests/vmm/mem_partial mode=0555
file path=opt/bhyve-tests/tests/vmm/mem_seg_map mode=0555
license lic_CDDL license=lic_CDDL
diff --git a/usr/src/pkg/manifests/system-kernel-platform.p5m b/usr/src/pkg/manifests/system-kernel-platform.p5m
index a7318e04e4..094eb360db 100644
--- a/usr/src/pkg/manifests/system-kernel-platform.p5m
+++ b/usr/src/pkg/manifests/system-kernel-platform.p5m
@@ -559,6 +559,8 @@ $(i386_ONLY)file path=platform/i86pc/kernel/misc/$(ARCH64)/acpidev group=sys \
mode=0755
$(i386_ONLY)file path=platform/i86pc/kernel/misc/$(ARCH64)/gfx_private \
group=sys mode=0755
+$(i386_ONLY)file path=platform/i86pc/kernel/misc/$(ARCH64)/pci_prd group=sys \
+ mode=0755
$(i386_ONLY)dir path=platform/i86pc/ucode group=sys
$(i386_ONLY)dir path=platform/i86xpv group=sys
$(i386_ONLY)dir path=platform/i86xpv/kernel group=sys
diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run
index babfa0f7e9..3055f3e2d8 100644
--- a/usr/src/test/bhyve-tests/runfiles/default.run
+++ b/usr/src/test/bhyve-tests/runfiles/default.run
@@ -20,7 +20,7 @@ post =
outputdir = /var/tmp/test_results
[/opt/bhyve-tests/tests/vmm]
-tests = ['mem_partial', 'mem_seg_map']
+tests = ['mem_partial', 'mem_seg_map', 'fpu_getset']
# Tests of userspace mevent system, built from cmd/bhyve
[/opt/bhyve-tests/tests/mevent]
diff --git a/usr/src/test/bhyve-tests/tests/vmm/Makefile b/usr/src/test/bhyve-tests/tests/vmm/Makefile
index c91ed9a7e4..30d06a0f6b 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/Makefile
+++ b/usr/src/test/bhyve-tests/tests/vmm/Makefile
@@ -16,7 +16,8 @@ include $(SRC)/cmd/Makefile.cmd.64
include $(SRC)/test/Makefile.com
PROG = mem_partial \
- mem_seg_map
+ mem_seg_map \
+ fpu_getset
COMMON_OBJS = common.o
CLEAN_OBJS = $(PROG:%=%.o)
diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.c b/usr/src/test/bhyve-tests/tests/vmm/common.c
index b7f0a30ed0..622a14c61f 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/common.c
+++ b/usr/src/test/bhyve-tests/tests/vmm/common.c
@@ -23,12 +23,13 @@
#include <vmmapi.h>
struct vmctx *
-create_test_vm(void)
+create_test_vm(const char *test_suite_name)
{
char name[VM_MAX_NAMELEN];
int res;
- (void) snprintf(name, sizeof (name), "bhyve-test-memmap-%d", getpid());
+ (void) snprintf(name, sizeof (name), "bhyve-test-%s-%d",
+ test_suite_name, getpid());
res = vm_create(name, 0);
if (res != 0) {
diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.h b/usr/src/test/bhyve-tests/tests/vmm/common.h
index 7b64574cf2..f210408b71 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/common.h
+++ b/usr/src/test/bhyve-tests/tests/vmm/common.h
@@ -16,7 +16,7 @@
#ifndef _COMMON_H_
#define _COMMON_H_
-struct vmctx *create_test_vm(void);
+struct vmctx *create_test_vm(const char *);
int alloc_memseg(struct vmctx *, int, size_t, const char *);
#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC)
diff --git a/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c b/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c
new file mode 100644
index 0000000000..814e15dec3
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c
@@ -0,0 +1,333 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stropts.h>
+#include <strings.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <libgen.h>
+#include <sys/debug.h>
+#include <sys/fp.h>
+
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <sys/x86_archext.h>
+#include <vmmapi.h>
+
+#include "common.h"
+
+/* Minimal xsave state area (sans any AVX storage) */
+struct xsave_min {
+ struct fxsave_state legacy;
+ struct xsave_header header;
+};
+
+CTASSERT(sizeof (struct xsave_min) == MIN_XSAVE_SIZE);
+
+struct avx_state {
+ /* 16 x 128-bit: high portions of the ymm registers */
+ uint64_t ymm[32];
+};
+
+static bool
+get_fpu(int fd, struct vm_fpu_state *req)
+{
+ int res = ioctl(fd, VM_GET_FPU, req);
+ if (res != 0) {
+ perror("could not read FPU for vCPU");
+ return (false);
+ }
+ return (true);
+}
+
+static bool
+set_fpu(int fd, struct vm_fpu_state *req)
+{
+ int res = ioctl(fd, VM_SET_FPU, req);
+ if (res != 0) {
+ perror("could not write FPU for vCPU");
+ return (false);
+ }
+ return (true);
+}
+
+static bool
+check_sse(int fd, const struct vm_fpu_desc *desc, void *fpu_area,
+ size_t fpu_size)
+{
+ /* Make sure the x87/MMX/SSE state is described as present */
+ bool found_fp = false, found_sse = false;
+ for (uint_t i = 0; i < desc->vfd_num_entries; i++) {
+ const struct vm_fpu_desc_entry *ent = &desc->vfd_entry_data[i];
+
+ switch (ent->vfde_feature) {
+ case XFEATURE_LEGACY_FP:
+ found_fp = true;
+ if (ent->vfde_off != 0 ||
+ ent->vfde_size != sizeof (struct fxsave_state)) {
+ (void) fprintf(stderr,
+ "unexpected entity for %x: "
+ "size=%x off=%x\n", ent->vfde_feature,
+ ent->vfde_size, ent->vfde_off);
+ return (false);
+ }
+ break;
+ case XFEATURE_SSE:
+ found_sse = true;
+ if (ent->vfde_off != 0 ||
+ ent->vfde_size != sizeof (struct fxsave_state)) {
+ (void) fprintf(stderr,
+ "unexpected entity for %x: "
+ "size=%x off=%x\n", ent->vfde_feature,
+ ent->vfde_size, ent->vfde_off);
+ return (false);
+ }
+ break;
+ }
+ }
+
+ if (!found_fp || !found_sse) {
+ (void) fprintf(stderr, "did not find x87 and SSE area "
+ "descriptors as expected in initial FPU\n");
+ return (false);
+ }
+
+ struct vm_fpu_state req = {
+ .vcpuid = 0,
+ .buf = fpu_area,
+ .len = fpu_size,
+ };
+
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+
+ struct xsave_min *xs = fpu_area;
+ /*
+ * Executing this test on a freshly-created instance, we expect the FPU
+ * to only have the legacy and SSE features present in its active state.
+ */
+ if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) {
+ (void) fprintf(stderr, "bad xstate_bv %lx, expected %lx",
+ xs->header.xsh_xstate_bv,
+ (XFEATURE_LEGACY_FP | XFEATURE_SSE));
+ return (false);
+ }
+
+ /* load some SSE values to check for a get/set cycle */
+ uint64_t *xmm = (void *)&xs->legacy.fx_xmm[0];
+ xmm[0] = UINT64_MAX;
+ xmm[2] = 1;
+
+ if (!set_fpu(fd, &req)) {
+ return (false);
+ }
+
+ /* check that those values made it in/out of the guest FPU */
+ bzero(fpu_area, fpu_size);
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+ if (xmm[0] != UINT64_MAX || xmm[2] != 1) {
+ (void) fprintf(stderr, "SSE test registers not saved\n");
+ return (false);
+ }
+
+ /* Make sure that a bogus MXCSR value is rejected */
+ xs->legacy.fx_mxcsr = UINT32_MAX;
+ int res = ioctl(fd, VM_SET_FPU, &req);
+ if (res == 0) {
+ (void) fprintf(stderr,
+ "write of invalid MXCSR erroneously allowed\n");
+ return (false);
+ }
+
+ return (true);
+}
+
+static bool
+check_avx(int fd, const struct vm_fpu_desc *desc, void *fpu_area,
+ size_t fpu_size)
+{
+ bool found_avx = false;
+ size_t avx_size, avx_off;
+ for (uint_t i = 0; i < desc->vfd_num_entries; i++) {
+ const struct vm_fpu_desc_entry *ent = &desc->vfd_entry_data[i];
+
+ if (ent->vfde_feature == XFEATURE_AVX) {
+ found_avx = true;
+ avx_size = ent->vfde_size;
+ avx_off = ent->vfde_off;
+ break;
+ }
+ }
+
+ if (!found_avx) {
+ (void) printf("AVX capability not found on host CPU, "
+ "skipping related tests\n");
+ return (true);
+ }
+
+ if (avx_size != sizeof (struct avx_state)) {
+ (void) fprintf(stderr, "unexpected AVX state size: %x, "
+ "expected %x\n", avx_size, sizeof (struct avx_state));
+ return (false);
+ }
+ if ((avx_off + avx_size) > fpu_size) {
+ (void) fprintf(stderr, "AVX data falls outside fpu size: "
+ "%x > %x\n", avx_off + avx_size, fpu_size);
+ return (false);
+ }
+
+ struct xsave_min *xs = fpu_area;
+ struct avx_state *avx = fpu_area + avx_off;
+
+ /* do a simple data round-trip */
+ struct vm_fpu_state req = {
+ .vcpuid = 0,
+ .buf = fpu_area,
+ .len = fpu_size,
+ };
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+
+ /* With AVX unused so far, we expect it to be absent from the BV */
+ if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) {
+ (void) fprintf(stderr, "bad xstate_bv %lx, expected %lx\n",
+ xs->header.xsh_xstate_bv,
+ (XFEATURE_LEGACY_FP | XFEATURE_SSE));
+ return (false);
+ }
+
+ avx->ymm[0] = UINT64_MAX;
+ avx->ymm[2] = 2;
+
+ /* first write without asserting AVX in BV */
+ if (!set_fpu(fd, &req)) {
+ return (false);
+ }
+
+ /* And check that the AVX state stays empty */
+ bzero(fpu_area, fpu_size);
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+ if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) {
+ (void) fprintf(stderr, "xstate_bv changed unexpectedly %lx\n",
+ xs->header.xsh_xstate_bv);
+ return (false);
+ }
+ if (avx->ymm[0] != 0 || avx->ymm[2] != 0) {
+ (void) fprintf(stderr, "YMM state changed unexpectedly "
+ "%lx %lx\n", avx->ymm[0], avx->ymm[2]);
+ return (false);
+ }
+
+ /* Now write YMM and set the appropriate AVX BV state */
+ avx->ymm[0] = UINT64_MAX;
+ avx->ymm[2] = 2;
+ xs->header.xsh_xstate_bv |= XFEATURE_AVX;
+ if (!set_fpu(fd, &req)) {
+ return (false);
+ }
+
+ /* ... and now check that it stuck */
+ bzero(fpu_area, fpu_size);
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+ if ((xs->header.xsh_xstate_bv & XFEATURE_AVX) == 0) {
+ (void) fprintf(stderr, "AVX missing from xstate_bv %lx\n",
+ xs->header.xsh_xstate_bv);
+ return (false);
+ }
+ if (avx->ymm[0] != UINT64_MAX || avx->ymm[2] != 2) {
+ (void) fprintf(stderr, "YMM state not preserved "
+ "%lx != %lx | %lx != %lx\n",
+ avx->ymm[0], UINT64_MAX, avx->ymm[2], 2);
+ return (false);
+ }
+
+
+ return (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct vmctx *ctx;
+ int res, fd;
+ const char *suite_name = basename(argv[0]);
+
+ ctx = create_test_vm(suite_name);
+ if (ctx == NULL) {
+ perror("could not open test VM");
+ return (EXIT_FAILURE);
+ }
+ fd = vm_get_device_fd(ctx);
+
+ struct vm_fpu_desc_entry entries[64];
+ struct vm_fpu_desc desc = {
+ .vfd_entry_data = entries,
+ .vfd_num_entries = 64,
+ };
+
+ res = ioctl(fd, VM_DESC_FPU_AREA, &desc);
+ if (res != 0) {
+ perror("could not query fpu area description");
+ goto bail;
+ }
+
+ /* Make sure the XSAVE area described for this machine is reasonable */
+ if (desc.vfd_num_entries == 0) {
+ (void) fprintf(stderr, "no FPU description entries found\n");
+ goto bail;
+ }
+ if (desc.vfd_req_size < MIN_XSAVE_SIZE) {
+ (void) fprintf(stderr, "required XSAVE size %lu < "
+ "expected %lu\n", desc.vfd_req_size, MIN_XSAVE_SIZE);
+ goto bail;
+ }
+
+ const size_t fpu_size = desc.vfd_req_size;
+ void *fpu_area = malloc(fpu_size);
+ if (fpu_area == NULL) {
+ perror("could not allocate fpu area");
+ goto bail;
+ }
+ bzero(fpu_area, fpu_size);
+
+ if (!check_sse(fd, &desc, fpu_area, fpu_size)) {
+ goto bail;
+ }
+ if (!check_avx(fd, &desc, fpu_area, fpu_size)) {
+ goto bail;
+ }
+
+ /* mission accomplished */
+ vm_destroy(ctx);
+ (void) printf("%s\tPASS\n", suite_name);
+ return (EXIT_SUCCESS);
+
+bail:
+ vm_destroy(ctx);
+ (void) printf("%s\tFAIL\n", suite_name);
+ return (EXIT_FAILURE);
+}
diff --git a/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c b/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c
index b410c673ab..964fdf95c5 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c
+++ b/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c
@@ -57,8 +57,9 @@ main(int argc, char *argv[])
struct vmctx *ctx;
int res, fd;
void *guest_mem;
+ const char *suite_name = basename(argv[0]);
- ctx = create_test_vm();
+ ctx = create_test_vm(suite_name);
if (ctx == NULL) {
perror("could open test VM");
return (1);
@@ -192,7 +193,7 @@ main(int argc, char *argv[])
}
/* mission accomplished */
- (void) printf("%s\tPASS\n", basename(argv[0]));
+ (void) printf("%s\tPASS\n", suite_name);
vm_destroy(ctx);
return (0);
diff --git a/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c b/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c
index e80f18547e..92d90bbf28 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c
+++ b/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c
@@ -40,8 +40,9 @@ main(int argc, char *argv[])
struct vmctx *ctx;
int res, fd;
void *seg_obj, *guest_mem;
+ const char *suite_name = basename(argv[0]);
- ctx = create_test_vm();
+ ctx = create_test_vm(suite_name);
if (ctx == NULL) {
perror("could open test VM");
return (1);
@@ -129,7 +130,7 @@ main(int argc, char *argv[])
/* mission accomplished */
vm_destroy(ctx);
- (void) printf("%s\tPASS\n", basename(argv[0]));
+ (void) printf("%s\tPASS\n", suite_name);
return (0);
bail:
diff --git a/usr/src/uts/common/io/blkdev/blkdev.c b/usr/src/uts/common/io/blkdev/blkdev.c
index c0bdb3dab2..611666b0a1 100644
--- a/usr/src/uts/common/io/blkdev/blkdev.c
+++ b/usr/src/uts/common/io/blkdev/blkdev.c
@@ -26,6 +26,7 @@
* Copyright 2017 The MathWorks, Inc. All rights reserved.
* Copyright 2019 Western Digital Corporation.
* Copyright 2020 Joyent, Inc.
+ * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
*/
#include <sys/types.h>
@@ -55,6 +56,11 @@
#include <sys/note.h>
#include <sys/blkdev.h>
#include <sys/scsi/impl/inquiry.h>
+#include <sys/taskq.h>
+#include <sys/taskq_impl.h>
+#include <sys/disp.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dev.h>
/*
* blkdev is a driver which provides a lot of the common functionality
@@ -122,8 +128,8 @@
*
* Locks
* -----
- * There are 4 instance global locks d_ocmutex, d_ksmutex, d_errmutex and
- * d_statemutex. As well a q_iomutex per waitq/runq pair.
+ * There are 5 instance global locks d_ocmutex, d_ksmutex, d_errmutex,
+ * d_statemutex and d_dle_mutex. As well a q_iomutex per waitq/runq pair.
*
* Lock Hierarchy
* --------------
@@ -139,11 +145,16 @@ typedef struct bd bd_t;
typedef struct bd_xfer_impl bd_xfer_impl_t;
typedef struct bd_queue bd_queue_t;
+typedef enum {
+ BD_DLE_PENDING = 1 << 0,
+ BD_DLE_RUNNING = 1 << 1
+} bd_dle_state_t;
+
struct bd {
void *d_private;
dev_info_t *d_dip;
- kmutex_t d_ocmutex;
- kmutex_t d_ksmutex;
+ kmutex_t d_ocmutex; /* open/close */
+ kmutex_t d_ksmutex; /* kstat */
kmutex_t d_errmutex;
kmutex_t d_statemutex;
kcondvar_t d_statecv;
@@ -183,6 +194,10 @@ struct bd {
ddi_dma_attr_t d_dma;
bd_ops_t d_ops;
bd_handle_t d_handle;
+
+ kmutex_t d_dle_mutex;
+ taskq_ent_t d_dle_ent;
+ bd_dle_state_t d_dle_state;
};
struct bd_handle {
@@ -328,20 +343,34 @@ static struct modlinkage modlinkage = {
static void *bd_state;
static krwlock_t bd_lock;
+static taskq_t *bd_taskq;
int
_init(void)
{
- int rv;
+ char taskq_name[TASKQ_NAMELEN];
+ const char *name;
+ int rv;
rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
- if (rv != DDI_SUCCESS) {
+ if (rv != DDI_SUCCESS)
return (rv);
+
+ name = mod_modname(&modlinkage);
+ (void) snprintf(taskq_name, sizeof (taskq_name), "%s_taskq", name);
+ bd_taskq = taskq_create(taskq_name, 1, minclsyspri, 0, 0, 0);
+ if (bd_taskq == NULL) {
+ cmn_err(CE_WARN, "%s: unable to create %s", name, taskq_name);
+ ddi_soft_state_fini(&bd_state);
+ return (DDI_FAILURE);
}
+
rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
+
rv = mod_install(&modlinkage);
if (rv != DDI_SUCCESS) {
rw_destroy(&bd_lock);
+ taskq_destroy(bd_taskq);
ddi_soft_state_fini(&bd_state);
}
return (rv);
@@ -355,6 +384,7 @@ _fini(void)
rv = mod_remove(&modlinkage);
if (rv == DDI_SUCCESS) {
rw_destroy(&bd_lock);
+ taskq_destroy(bd_taskq);
ddi_soft_state_fini(&bd_state);
}
return (rv);
@@ -696,6 +726,8 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
+ mutex_init(&bd->d_dle_mutex, NULL, MUTEX_DRIVER, NULL);
+ bd->d_dle_state = 0;
bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
@@ -853,6 +885,7 @@ fail_drive_info:
mutex_destroy(&bd->d_statemutex);
mutex_destroy(&bd->d_ocmutex);
mutex_destroy(&bd->d_ksmutex);
+ mutex_destroy(&bd->d_dle_mutex);
ddi_soft_state_free(bd_state, inst);
return (DDI_FAILURE);
}
@@ -891,6 +924,7 @@ bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
mutex_destroy(&bd->d_ocmutex);
mutex_destroy(&bd->d_statemutex);
cv_destroy(&bd->d_statecv);
+ mutex_destroy(&bd->d_dle_mutex);
bd_queues_free(bd);
ddi_soft_state_free(bd_state, ddi_get_instance(dip));
return (DDI_SUCCESS);
@@ -1890,6 +1924,69 @@ bd_runq_exit(bd_xfer_impl_t *xi, int err)
}
static void
+bd_dle_sysevent_task(void *arg)
+{
+ nvlist_t *attr = NULL;
+ char *path = NULL;
+ bd_t *bd = arg;
+ dev_info_t *dip = bd->d_dip;
+ size_t n;
+
+ mutex_enter(&bd->d_dle_mutex);
+ bd->d_dle_state &= ~BD_DLE_PENDING;
+ bd->d_dle_state |= BD_DLE_RUNNING;
+ mutex_exit(&bd->d_dle_mutex);
+
+ dev_err(dip, CE_NOTE, "!dynamic LUN expansion");
+
+ if (nvlist_alloc(&attr, NV_UNIQUE_NAME_TYPE, KM_SLEEP) != 0) {
+ mutex_enter(&bd->d_dle_mutex);
+ bd->d_dle_state &= ~(BD_DLE_RUNNING|BD_DLE_PENDING);
+ mutex_exit(&bd->d_dle_mutex);
+ return;
+ }
+
+ path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+
+ n = snprintf(path, MAXPATHLEN, "/devices");
+ (void) ddi_pathname(dip, path + n);
+ n = strlen(path);
+ n += snprintf(path + n, MAXPATHLEN - n, ":x");
+
+ for (;;) {
+ /*
+ * On receipt of this event, the ZFS sysevent module will scan
+ * active zpools for child vdevs matching this physical path.
+ * In order to catch both whole disk pools and those with an
+ * EFI boot partition, generate separate sysevents for minor
+ * node 'a' and 'b'. (By comparison, io/scsi/targets/sd.c sends
+ * events for just 'a')
+ */
+ for (char c = 'a'; c < 'c'; c++) {
+ path[n - 1] = c;
+
+ if (nvlist_add_string(attr, DEV_PHYS_PATH, path) != 0)
+ break;
+
+ (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW,
+ EC_DEV_STATUS, ESC_DEV_DLE, attr, NULL, DDI_SLEEP);
+ }
+
+ mutex_enter(&bd->d_dle_mutex);
+ if ((bd->d_dle_state & BD_DLE_PENDING) == 0) {
+ bd->d_dle_state &= ~BD_DLE_RUNNING;
+ mutex_exit(&bd->d_dle_mutex);
+ break;
+ }
+ bd->d_dle_state &= ~BD_DLE_PENDING;
+ mutex_exit(&bd->d_dle_mutex);
+ }
+
+ nvlist_free(attr);
+ kmem_free(path, MAXPATHLEN);
+}
+
+static void
bd_update_state(bd_t *bd)
{
enum dkio_state state = DKIO_INSERTED;
@@ -1908,8 +2005,7 @@ bd_update_state(bd_t *bd)
if ((media.m_blksize < 512) ||
(!ISP2(media.m_blksize)) ||
(P2PHASE(bd->d_maxxfer, media.m_blksize))) {
- cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
- ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
+ dev_err(bd->d_dip, CE_WARN, "Invalid media block size (%d)",
media.m_blksize);
/*
* We can't use the media, treat it as not present.
@@ -1954,6 +2050,21 @@ done:
if (docmlb) {
if (state == DKIO_INSERTED) {
(void) cmlb_validate(bd->d_cmlbh, 0, 0);
+
+ mutex_enter(&bd->d_dle_mutex);
+ /*
+ * If there is already an event pending, there's
+ * nothing to do; we coalesce multiple events.
+ */
+ if ((bd->d_dle_state & BD_DLE_PENDING) == 0) {
+ if ((bd->d_dle_state & BD_DLE_RUNNING) == 0) {
+ taskq_dispatch_ent(bd_taskq,
+ bd_dle_sysevent_task, bd, 0,
+ &bd->d_dle_ent);
+ }
+ bd->d_dle_state |= BD_DLE_PENDING;
+ }
+ mutex_exit(&bd->d_dle_mutex);
} else {
cmlb_invalidate(bd->d_cmlbh, 0);
}
diff --git a/usr/src/uts/common/io/iwn/if_iwnreg.h b/usr/src/uts/common/io/iwn/if_iwnreg.h
index 78bfc3088f..e6d6d6d4b8 100644
--- a/usr/src/uts/common/io/iwn/if_iwnreg.h
+++ b/usr/src/uts/common/io/iwn/if_iwnreg.h
@@ -100,10 +100,10 @@
#define IWN_MEM_WADDR 0x410
#define IWN_MEM_WDATA 0x418
#define IWN_MEM_RDATA 0x41c
-#define IWN_PRPH_WADDR 0x444
-#define IWN_PRPH_RADDR 0x448
-#define IWN_PRPH_WDATA 0x44c
-#define IWN_PRPH_RDATA 0x450
+#define IWN_PRPH_WADDR 0x444
+#define IWN_PRPH_RADDR 0x448
+#define IWN_PRPH_WDATA 0x44c
+#define IWN_PRPH_RDATA 0x450
#define IWN_HBUS_TARG_WRPTR 0x460
/*
@@ -1694,8 +1694,8 @@ static const struct iwn_chan_band {
{ 11, { 36, 44, 52, 60, 100, 108, 116, 124, 132, 149, 157 } }
};
-#define IWN1000_OTP_NBLOCKS 3
-#define IWN6000_OTP_NBLOCKS 4
+#define IWN1000_OTP_NBLOCKS 3
+#define IWN6000_OTP_NBLOCKS 4
#define IWN6050_OTP_NBLOCKS 7
/* HW rate indices. */
@@ -1971,7 +1971,7 @@ static const char * const iwn_fw_errmsg[] = {
"NMI_INTERRUPT_DATA_ACTION_PT",
"NMI_TRM_HW_ER",
"NMI_INTERRUPT_TRM",
- "NMI_INTERRUPT_BREAKPOINT"
+ "NMI_INTERRUPT_BREAKPOINT",
"DEBUG_0",
"DEBUG_1",
"DEBUG_2",
diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c
index ad076201e0..e9c779e323 100644
--- a/usr/src/uts/common/io/nvme/nvme.c
+++ b/usr/src/uts/common/io/nvme/nvme.c
@@ -445,6 +445,8 @@ static int nvme_open(dev_t *, int, int, cred_t *);
static int nvme_close(dev_t, int, int, cred_t *);
static int nvme_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+static void nvme_changed_ns(nvme_t *, int);
+
static ddi_ufm_ops_t nvme_ufm_ops = {
NULL,
nvme_ufm_fill_image,
@@ -1955,11 +1957,7 @@ nvme_async_event_task(void *arg)
if (nsid == 0) /* end of list */
break;
-
- dev_err(nvme->n_dip, CE_CONT,
- "namespace %u (%s) has changed.\n",
- nsid, nvme->n_ns[nsid - 1].ns_name);
- /* TODO: handle namespace resize. */
+ nvme_changed_ns(nvme, nsid);
}
break;
@@ -2693,6 +2691,41 @@ nvme_prepare_devid(nvme_t *nvme, uint32_t nsid)
nvme->n_idctl->id_vid, model, serial, nsid);
}
+static void
+nvme_changed_ns(nvme_t *nvme, int nsid)
+{
+ nvme_namespace_t *ns = &nvme->n_ns[nsid - 1];
+ nvme_identify_nsid_t *idns, *oidns;
+
+ dev_err(nvme->n_dip, CE_NOTE, "!namespace %u (%s) has changed.",
+ nsid, ns->ns_name);
+
+ if (ns->ns_ignore)
+ return;
+
+ /*
+ * The namespace has changed in some way. At present, we only update
+ * the device capacity and trigger blkdev to check the device state.
+ */
+
+ if (nvme_identify(nvme, B_FALSE, nsid, (void **)&idns) != 0) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!failed to identify namespace %d", nsid);
+ return;
+ }
+
+ oidns = ns->ns_idns;
+ ns->ns_idns = idns;
+ kmem_free(oidns, sizeof (nvme_identify_nsid_t));
+
+ ns->ns_block_count = idns->id_nsize;
+ ns->ns_block_size =
+ 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
+ ns->ns_best_block_size = ns->ns_block_size;
+
+ bd_state_change(ns->ns_bd_hdl);
+}
+
static int
nvme_init_ns(nvme_t *nvme, int nsid)
{
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 24fdd94c11..fed179dad1 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -1136,6 +1136,9 @@ NXGEHDRS= \
nxge_virtual.h \
nxge_espc.h
+PLATHDRS= \
+ pci_prd.h
+
include Makefile.syshdrs
dcam/%.check: dcam/%.h
@@ -1203,7 +1206,8 @@ CHECKHDRS= \
$(I1394HDRS:%.h=1394/%.check) \
$(RSMHDRS:%.h=rsm/%.check) \
$(TSOLHDRS:%.h=tsol/%.check) \
- $(NXGEHDRS:%.h=nxge/%.check)
+ $(NXGEHDRS:%.h=nxge/%.check) \
+ $(PLATHDRS:%.h=plat/%.check)
.KEEP_STATE:
@@ -1243,6 +1247,7 @@ CHECKHDRS= \
$(ROOTTAVORHDRS) \
$(ROOTHERMONHDRS) \
$(ROOTMLNXHDRS) \
+ $(ROOTPLATHDRS) \
$(ROOTSCSIHDRS) \
$(ROOTSCSIADHDRS) \
$(ROOTSCSICONFHDRS) \
@@ -1311,6 +1316,7 @@ install_h: \
$(ROOTTAVORHDRS) \
$(ROOTHERMONHDRS) \
$(ROOTMLNXHDRS) \
+ $(ROOTPLATHDRS) \
$(ROOTSCSIHDRS) \
$(ROOTSCSIADHDRS) \
$(ROOTSCSIISCSIHDRS) \
diff --git a/usr/src/uts/common/sys/plat/pci_prd.h b/usr/src/uts/common/sys/plat/pci_prd.h
new file mode 100644
index 0000000000..aa0a7932b8
--- /dev/null
+++ b/usr/src/uts/common/sys/plat/pci_prd.h
@@ -0,0 +1,130 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _SYS_PLAT_PCI_PRD_H
+#define _SYS_PLAT_PCI_PRD_H
+
+/*
+ * PCI Platform Resource Discovery (PRD)
+ *
+ * This file forms the platform-specific interfaces that a given platform must
+ * implement to support the discovery of PCI resources. In particular:
+ *
+ * o Any root complexes that do not show up through the use of normal scanning
+ * o Available resources per root-port including:
+ * + I/O ports
+ * + Prefetchable Memory
+ * + Normal Memory
+ * + PCI buses
+ * o The naming of slots (the platform uses the PCIe default)
+ *
+ * These interfaces are all expected to be implemented by a platform's 'pci_prd'
+ * module. This is left as a module and not a part of say, unix, so that it can
+ * in turn depend on other modules that a platform might require, such as ACPI.
+ *
+ * In general, unless otherwise indicated, these interfaces will always be
+ * called from kernel context, typically during boot. The interfaces will only
+ * be called from a single thread at this time and any locking is managed at a
+ * layer outside of the pci_prd interfaces. If the subsystem is using some other
+ * interfaces that may be used by multiple consumers and needs locking (e.g.
+ * ACPI), then that still must be considered in the design and implementation.
+ */
+
+#include <sys/types.h>
+#include <sys/memlist.h>
+#include <sys/sunddi.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Resource types that can be asked after.
+ */
+typedef enum pci_prd_rsrc {
+ PCI_PRD_R_IO,
+ PCI_PRD_R_MMIO,
+ PCI_PRD_R_PREFETCH,
+ PCI_PRD_R_BUS
+} pci_prd_rsrc_t;
+
+typedef struct pci_prd_upcalls {
+ /*
+ * Return a dev_info_t, if one exists, for this PCI bus.
+ */
+ dev_info_t *(*pru_bus2dip_f)(uint32_t);
+} pci_prd_upcalls_t;
+
+/*
+ * Initialization and teardown functions that will be used by the PCI
+ * enumeration code when it attaches and detaches. If all work is done before
+ * these come up, there is nothing to do; however, after a call to the _init()
+ * function, it is expected that the platform module will be ready to respond to
+ * all function calls.
+ *
+ * Note that the _fini function may never be called as on a typical system, as
+ * any PCI(e) devices with attached drivers will result in the PRD consumer
+ * remaining loaded.
+ */
+extern int pci_prd_init(pci_prd_upcalls_t *);
+extern void pci_prd_fini(void);
+
+/*
+ * Return the maximum PCI bus on this platform that should be searched. This
+ * number is the last bus number that should be scanned. e.g. a value of 0x10
+ * indicates that we will search buses [0, 0x10]. In general, it is expected
+ * that platforms will just return 0xff (PCI_MAX_BUS_NUM - 1) unless for some
+ * reason it has other knowledge here.
+ */
+extern uint32_t pci_prd_max_bus(void);
+
+/*
+ * Look up a set of resources that should be assigned to the PCI bus. In
+ * general, it is expected that these are only the buses that are assigned to
+ * root complexes.
+ */
+extern struct memlist *pci_prd_find_resource(uint32_t, pci_prd_rsrc_t);
+
+/*
+ * Originally when only using BIOS-derived (pre-ACPI) sources on i86pc, the
+ * ability to utilize data about multiple buses was considered suspect. As such,
+ * this exists as a way to indicate that resources on each root complex are
+ * actually valid.
+ */
+extern boolean_t pci_prd_multi_root_ok(void);
+
+/*
+ * This is used to allow the PCI enumeration code to ask the platform about any
+ * PCI root complexes that it might know about which might not be discovered
+ * through the normal scanning process. One callback will be emitted for each
+ * PCI bus via a call to the callback function. The return value of the callback
+ * function determines whether we should continue iterating (B_TRUE) or
+ * terminate (B_FALSE).
+ */
+typedef boolean_t (*pci_prd_root_complex_f)(uint32_t, void *);
+extern void pci_prd_root_complex_iter(pci_prd_root_complex_f, void *);
+
+/*
+ * Give the chance for a platform file to go through and use knowledge that it
+ * has (such as the traditional BIOS PCI IRQ routing table) to name the PCI(e)
+ * slot.
+ */
+extern void pci_prd_slot_name(uint32_t, dev_info_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PLAT_PCI_PRD_H */
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 3f387f508c..e29d11b64b 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -25,7 +25,7 @@
# Copyright (c) 2010, Intel Corporation.
# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
# Copyright 2020 Joyent, Inc.
-# Copyright 2021 Oxide Computer Company
+# Copyright 2022 Oxide Computer Company
# Copyright 2021 Jason King
#
# This Makefile defines file modules in the directory uts/i86pc
@@ -287,6 +287,8 @@ VIONA_OBJS += viona_main.o \
PPT_OBJS += ppt.o
+PCI_PRD_OBJS += pci_prd_i86pc.o pci_memlist.o
+
#
# Build up defines and paths.
#
diff --git a/usr/src/uts/i86pc/Makefile.i86pc b/usr/src/uts/i86pc/Makefile.i86pc
index 4a11adcaa2..cd1f6e2db9 100644
--- a/usr/src/uts/i86pc/Makefile.i86pc
+++ b/usr/src/uts/i86pc/Makefile.i86pc
@@ -294,7 +294,7 @@ SYS_KMODS +=
#
# 'Misc' Modules (/kernel/misc):
#
-MISC_KMODS += gfx_private pcie
+MISC_KMODS += gfx_private pcie pci_prd
MISC_KMODS += acpidev
MISC_KMODS += drmach_acpi
diff --git a/usr/src/uts/intel/io/pci/mps_table.h b/usr/src/uts/i86pc/io/pci/mps_table.h
index 8f8c1dc24e..df693eb091 100644
--- a/usr/src/uts/intel/io/pci/mps_table.h
+++ b/usr/src/uts/i86pc/io/pci/mps_table.h
@@ -29,8 +29,6 @@
#ifndef _MPS_TABLE_H
#define _MPS_TABLE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -42,7 +40,7 @@ struct mps_fps_hdr { /* MP Floating Pointer Structure */
uchar_t fps_len; /* in paragraph (16-bytes units) */
uchar_t fps_spec_rev; /* MP Spec. version no. */
uchar_t fps_cksum; /* checksum of complete structure */
- uchar_t fps_featinfo1; /* mp feature info byte 1 */
+ uchar_t fps_featinfo1; /* mp feature info byte 1 */
uchar_t fps_featinfo2; /* mp feature info byte 2 */
uchar_t fps_featinfo3; /* mp feature info byte 3 */
uchar_t fps_featinfo4; /* mp feature info byte 4 */
@@ -51,7 +49,7 @@ struct mps_fps_hdr { /* MP Floating Pointer Structure */
struct mps_ct_hdr { /* MP Configuration Table Header */
uint32_t ct_sig; /* "PCMP" */
- uint16_t ct_len; /* base configuration in bytes */
+ uint16_t ct_len; /* base configuration in bytes */
uchar_t ct_spec_rev; /* MP Spec. version no. */
uchar_t ct_cksum; /* base configuration table checksum */
char ct_oem_id[8]; /* string identifies the manufacturer */
@@ -60,7 +58,7 @@ struct mps_ct_hdr { /* MP Configuration Table Header */
uint16_t ct_oem_tbl_len; /* size of base OEM table in bytes */
uint16_t ct_entry_cnt; /* no. of entries in the base table */
uint32_t ct_local_apic; /* paddr of local APIC */
- uint16_t ct_ext_tbl_len; /* extended table in bytes */
+ uint16_t ct_ext_tbl_len; /* extended table in bytes */
uchar_t ct_ext_cksum; /* checksum for the extended table */
};
diff --git a/usr/src/uts/intel/io/pci/pci_resource.c b/usr/src/uts/i86pc/io/pci/pci_prd_i86pc.c
index 57dbe12427..5ba872655c 100644
--- a/usr/src/uts/intel/io/pci/pci_resource.c
+++ b/usr/src/uts/i86pc/io/pci/pci_prd_i86pc.c
@@ -19,54 +19,63 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- *
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2016 Joyent, Inc.
+ * Copyright 2019 Western Digital Corporation
* Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 Oxide Computer Company
+ */
+
+/*
+ * This file contains the x86 PCI platform resource discovery backend. This uses
+ * data from a combination of sources, preferring ACPI, if present, and if not,
+ * falling back to either the PCI hot-plug resource table or the mps tables.
*
- * pci_resource.c -- routines to retrieve available bus resources from
- * the MP Spec. Table and Hotplug Resource Table
+ * Today, to get information from ACPI we need to start from a dev_info_t. This
+ * is partly why the PRD interface has a callback for getting information about
+ * a dev_info_t. It also means we cannot initialize the tables with information
+ * until all devices have been initially scanned.
*/
#include <sys/types.h>
#include <sys/memlist.h>
+#include <sys/pci.h>
#include <sys/pci_impl.h>
+#include <sys/pci_cfgspace_impl.h>
+#include <sys/sunndi.h>
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
+#include <sys/plat/pci_prd.h>
#include "mps_table.h"
#include "pcihrt.h"
-extern int pci_boot_debug;
extern int pci_bios_maxbus;
-#define dprintf if (pci_boot_debug) printf
+
+int pci_prd_debug = 0;
+#define dprintf if (pci_prd_debug) printf
+#define dcmn_err if (pci_prd_debug != 0) cmn_err
static int tbl_init = 0;
static uchar_t *mps_extp = NULL;
static uchar_t *mps_ext_endp = NULL;
static struct php_entry *hrt_hpep;
-static int hrt_entry_cnt = 0;
+static uint_t hrt_entry_cnt = 0;
static int acpi_cb_cnt = 0;
+static pci_prd_upcalls_t *prd_upcalls;
static void mps_probe(void);
static void acpi_pci_probe(void);
-static int mps_find_bus_res(int, int, struct memlist **);
+static int mps_find_bus_res(uint32_t, pci_prd_rsrc_t, struct memlist **);
static void hrt_probe(void);
-static int hrt_find_bus_res(int, int, struct memlist **);
-static int acpi_find_bus_res(int, int, struct memlist **);
+static int hrt_find_bus_res(uint32_t, pci_prd_rsrc_t, struct memlist **);
+static int acpi_find_bus_res(uint32_t, pci_prd_rsrc_t, struct memlist **);
static uchar_t *find_sig(uchar_t *cp, int len, char *sig);
static int checksum(unsigned char *cp, int len);
static ACPI_STATUS acpi_wr_cb(ACPI_RESOURCE *rp, void *context);
-void bus_res_fini(void);
static void acpi_trim_bus_ranges(void);
-struct memlist *acpi_io_res[256];
-struct memlist *acpi_mem_res[256];
-struct memlist *acpi_pmem_res[256];
-struct memlist *acpi_bus_res[256];
-
/*
* -1 = attempt ACPI resource discovery
* 0 = don't attempt ACPI resource discovery
@@ -74,53 +83,35 @@ struct memlist *acpi_bus_res[256];
*/
volatile int acpi_resource_discovery = -1;
-struct memlist *
-find_bus_res(int bus, int type)
-{
- struct memlist *res = NULL;
- boolean_t bios = B_TRUE;
-
- /* if efi-systab property exist, there is no BIOS */
- if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), DDI_PROP_DONTPASS,
- "efi-systab")) {
- bios = B_FALSE;
- }
-
- if (tbl_init == 0) {
- tbl_init = 1;
- acpi_pci_probe();
- if (bios) {
- hrt_probe();
- mps_probe();
- }
- }
-
- if (acpi_find_bus_res(bus, type, &res) > 0)
- return (res);
-
- if (bios && hrt_find_bus_res(bus, type, &res) > 0)
- return (res);
+struct memlist *acpi_io_res[PCI_MAX_BUS_NUM];
+struct memlist *acpi_mem_res[PCI_MAX_BUS_NUM];
+struct memlist *acpi_pmem_res[PCI_MAX_BUS_NUM];
+struct memlist *acpi_bus_res[PCI_MAX_BUS_NUM];
- if (bios)
- (void) mps_find_bus_res(bus, type, &res);
- return (res);
-}
+/*
+ * This indicates whether or not we have a traditional x86 BIOS present or not.
+ */
+static boolean_t pci_prd_have_bios = B_TRUE;
+/*
+ * This value is set up as part of PCI configuration space initialization.
+ */
+extern int pci_bios_maxbus;
static void
acpi_pci_probe(void)
{
ACPI_HANDLE ah;
- dev_info_t *dip;
int bus;
if (acpi_resource_discovery == 0)
return;
for (bus = 0; bus <= pci_bios_maxbus; bus++) {
- /* if no dip or no ACPI handle, no resources to discover */
- dip = pci_bus_res[bus].dip;
- if ((dip == NULL) ||
+ dev_info_t *dip;
+
+ dip = prd_upcalls->pru_bus2dip_f(bus);
+ if (dip == NULL ||
(ACPI_FAILURE(acpica_get_handle(dip, &ah))))
continue;
@@ -142,7 +133,7 @@ acpi_pci_probe(void)
* be trimmed to "0..7", in the example).
*/
static void
-acpi_trim_bus_ranges()
+acpi_trim_bus_ranges(void)
{
struct memlist *ranges, *current;
int bus;
@@ -154,7 +145,7 @@ acpi_trim_bus_ranges()
* - there exists at most 1 bus range entry for each bus number
* - there are no (broken) ranges that start at the same bus number
*/
- for (bus = 0; bus < 256; bus++) {
+ for (bus = 0; bus < PCI_MAX_BUS_NUM; bus++) {
struct memlist *prev, *orig, *new;
/* skip buses with no range entry */
if ((orig = acpi_bus_res[bus]) == NULL)
@@ -211,20 +202,21 @@ acpi_trim_bus_ranges()
}
static int
-acpi_find_bus_res(int bus, int type, struct memlist **res)
+acpi_find_bus_res(uint32_t bus, pci_prd_rsrc_t type, struct memlist **res)
{
+ ASSERT3U(bus, <, PCI_MAX_BUS_NUM);
switch (type) {
- case IO_TYPE:
+ case PCI_PRD_R_IO:
*res = acpi_io_res[bus];
break;
- case MEM_TYPE:
+ case PCI_PRD_R_MMIO:
*res = acpi_mem_res[bus];
break;
- case PREFETCH_TYPE:
+ case PCI_PRD_R_PREFETCH:
*res = acpi_pmem_res[bus];
break;
- case BUSRANGE_TYPE:
+ case PCI_PRD_R_BUS:
*res = acpi_bus_res[bus];
break;
default:
@@ -236,19 +228,6 @@ acpi_find_bus_res(int bus, int type, struct memlist **res)
return (memlist_count(*res));
}
-void
-bus_res_fini(void)
-{
- int bus;
-
- for (bus = 0; bus <= pci_bios_maxbus; bus++) {
- memlist_free_all(&acpi_io_res[bus]);
- memlist_free_all(&acpi_mem_res[bus]);
- memlist_free_all(&acpi_pmem_res[bus]);
- memlist_free_all(&acpi_bus_res[bus]);
- }
-}
-
static struct memlist **
rlistpp(UINT8 t, UINT8 caching, int bus)
{
@@ -298,7 +277,7 @@ acpi_dbg(uint_t bus, uint64_t addr, uint64_t len, uint8_t caching, uint8_t type,
}
-ACPI_STATUS
+static ACPI_STATUS
acpi_wr_cb(ACPI_RESOURCE *rp, void *context)
{
int bus = (intptr_t)context;
@@ -332,7 +311,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context)
acpi_cb_cnt++;
memlist_insert(&acpi_io_res[bus], rp->Data.Io.Minimum,
rp->Data.Io.AddressLength);
- if (pci_boot_debug != 0) {
+ if (pci_prd_debug != 0) {
acpi_dbg(bus, rp->Data.Io.Minimum,
rp->Data.Io.AddressLength, 0, ACPI_IO_RANGE, "IO");
}
@@ -374,7 +353,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context)
rp->Data.Address.Info.Mem.Caching, bus),
rp->Data.Address16.Address.Minimum,
rp->Data.Address16.Address.AddressLength);
- if (pci_boot_debug != 0) {
+ if (pci_prd_debug != 0) {
acpi_dbg(bus,
rp->Data.Address16.Address.Minimum,
rp->Data.Address16.Address.AddressLength,
@@ -391,7 +370,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context)
rp->Data.Address.Info.Mem.Caching, bus),
rp->Data.Address32.Address.Minimum,
rp->Data.Address32.Address.AddressLength);
- if (pci_boot_debug != 0) {
+ if (pci_prd_debug != 0) {
acpi_dbg(bus,
rp->Data.Address32.Address.Minimum,
rp->Data.Address32.Address.AddressLength,
@@ -409,7 +388,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context)
rp->Data.Address.Info.Mem.Caching, bus),
rp->Data.Address64.Address.Minimum,
rp->Data.Address64.Address.AddressLength);
- if (pci_boot_debug != 0) {
+ if (pci_prd_debug != 0) {
acpi_dbg(bus,
rp->Data.Address64.Address.Minimum,
rp->Data.Address64.Address.AddressLength,
@@ -426,7 +405,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context)
rp->Data.Address.Info.Mem.Caching, bus),
rp->Data.ExtAddress64.Address.Minimum,
rp->Data.ExtAddress64.Address.AddressLength);
- if (pci_boot_debug != 0) {
+ if (pci_prd_debug != 0) {
acpi_dbg(bus,
rp->Data.ExtAddress64.Address.Minimum,
rp->Data.ExtAddress64.Address.AddressLength,
@@ -450,7 +429,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context)
}
static void
-mps_probe()
+mps_probe(void)
{
uchar_t *extp;
struct mps_fps_hdr *fpp = NULL;
@@ -521,22 +500,43 @@ mps_probe()
static int
-mps_find_bus_res(int bus, int type, struct memlist **res)
+mps_find_bus_res(uint32_t bus, pci_prd_rsrc_t rsrc, struct memlist **res)
{
struct sasm *sasmp;
uchar_t *extp;
- int res_cnt;
+ int res_cnt, type;
+
+ ASSERT3U(bus, <, PCI_MAX_BUS_NUM);
if (mps_extp == NULL)
return (0);
+
+ switch (rsrc) {
+ case PCI_PRD_R_IO:
+ type = IO_TYPE;
+ break;
+ case PCI_PRD_R_MMIO:
+ type = MEM_TYPE;
+ break;
+ case PCI_PRD_R_PREFETCH:
+ type = PREFETCH_TYPE;
+ break;
+ case PCI_PRD_R_BUS:
+ type = BUSRANGE_TYPE;
+ break;
+ default:
+ *res = NULL;
+ return (0);
+ }
+
extp = mps_extp;
res_cnt = 0;
while (extp < mps_ext_endp) {
switch (*extp) {
case SYS_AS_MAPPING:
sasmp = (struct sasm *)extp;
- if (((int)sasmp->sasm_as_type) == type &&
- ((int)sasmp->sasm_bus_id) == bus) {
+ if (sasmp->sasm_as_type == type &&
+ sasmp->sasm_bus_id == bus) {
uint64_t base, len;
base = (uint64_t)sasmp->sasm_as_base |
@@ -558,11 +558,7 @@ mps_find_bus_res(int bus, int type, struct memlist **res)
cmn_err(CE_WARN, "Unknown descriptor type %d"
" in BIOS Multiprocessor Spec table.",
*extp);
- while (*res) {
- struct memlist *tmp = *res;
- *res = tmp->ml_next;
- memlist_free(tmp);
- }
+ memlist_free_all(res);
return (0);
}
}
@@ -570,7 +566,7 @@ mps_find_bus_res(int bus, int type, struct memlist **res)
}
static void
-hrt_probe()
+hrt_probe(void)
{
struct hrt_hdr *hrtp;
@@ -585,44 +581,46 @@ hrt_probe()
dprintf("PCI Hot-Plug Resource Table version no. <> 1\n");
return;
}
- hrt_entry_cnt = (int)hrtp->hrt_entry_cnt;
+ hrt_entry_cnt = (uint_t)hrtp->hrt_entry_cnt;
dprintf("No. of PCI hot-plug slot entries = 0x%x\n", hrt_entry_cnt);
hrt_hpep = (struct php_entry *)(hrtp + 1);
}
static int
-hrt_find_bus_res(int bus, int type, struct memlist **res)
+hrt_find_bus_res(uint32_t bus, pci_prd_rsrc_t type, struct memlist **res)
{
- int res_cnt, i;
+ int res_cnt;
struct php_entry *hpep;
+ ASSERT3U(bus, <, PCI_MAX_BUS_NUM);
+
if (hrt_hpep == NULL || hrt_entry_cnt == 0)
return (0);
hpep = hrt_hpep;
res_cnt = 0;
- for (i = 0; i < hrt_entry_cnt; i++, hpep++) {
+ for (uint_t i = 0; i < hrt_entry_cnt; i++, hpep++) {
if (hpep->php_pri_bus != bus)
continue;
- if (type == IO_TYPE) {
+ if (type == PCI_PRD_R_IO) {
if (hpep->php_io_start == 0 || hpep->php_io_size == 0)
continue;
memlist_insert(res, (uint64_t)hpep->php_io_start,
(uint64_t)hpep->php_io_size);
res_cnt++;
- } else if (type == MEM_TYPE) {
+ } else if (type == PCI_PRD_R_MMIO) {
if (hpep->php_mem_start == 0 || hpep->php_mem_size == 0)
continue;
memlist_insert(res,
- (uint64_t)(((int)hpep->php_mem_start) << 16),
- (uint64_t)(((int)hpep->php_mem_size) << 16));
+ ((uint64_t)hpep->php_mem_start) << 16,
+ ((uint64_t)hpep->php_mem_size) << 16);
res_cnt++;
- } else if (type == PREFETCH_TYPE) {
+ } else if (type == PCI_PRD_R_PREFETCH) {
if (hpep->php_pfmem_start == 0 ||
hpep->php_pfmem_size == 0)
continue;
memlist_insert(res,
- (uint64_t)(((int)hpep->php_pfmem_start) << 16),
- (uint64_t)(((int)hpep->php_pfmem_size) << 16));
+ ((uint64_t)hpep->php_pfmem_start) << 16,
+ ((uint64_t)hpep->php_pfmem_size) << 16);
res_cnt++;
}
}
@@ -656,67 +654,226 @@ checksum(unsigned char *cp, int len)
return ((int)(cksum & 0xFF));
}
-#ifdef UNUSED_BUS_HIERARY_INFO
+uint32_t
+pci_prd_max_bus(void)
+{
+ return ((uint32_t)pci_bios_maxbus);
+}
+
+struct memlist *
+pci_prd_find_resource(uint32_t bus, pci_prd_rsrc_t rsrc)
+{
+ struct memlist *res = NULL;
+
+ if (bus > pci_bios_maxbus)
+ return (NULL);
+
+ if (tbl_init == 0) {
+ tbl_init = 1;
+ acpi_pci_probe();
+ if (pci_prd_have_bios) {
+ hrt_probe();
+ mps_probe();
+ }
+ }
+
+ if (acpi_find_bus_res(bus, rsrc, &res) > 0)
+ return (res);
+
+ if (pci_prd_have_bios && hrt_find_bus_res(bus, rsrc, &res) > 0)
+ return (res);
+
+ if (pci_prd_have_bios)
+ (void) mps_find_bus_res(bus, rsrc, &res);
+ return (res);
+}
+
+typedef struct {
+ pci_prd_root_complex_f ppac_func;
+ void *ppac_arg;
+} pci_prd_acpi_cb_t;
+
+static ACPI_STATUS
+pci_process_acpi_device(ACPI_HANDLE hdl, UINT32 level, void *ctx, void **rv)
+{
+ ACPI_DEVICE_INFO *adi;
+ int busnum;
+ pci_prd_acpi_cb_t *cb = ctx;
+
+ /*
+ * Use AcpiGetObjectInfo() to find the device _HID
+ * If not a PCI root-bus, ignore this device and continue
+ * the walk
+ */
+ if (ACPI_FAILURE(AcpiGetObjectInfo(hdl, &adi)))
+ return (AE_OK);
+
+ if (!(adi->Valid & ACPI_VALID_HID)) {
+ AcpiOsFree(adi);
+ return (AE_OK);
+ }
+
+ if (strncmp(adi->HardwareId.String, PCI_ROOT_HID_STRING,
+ sizeof (PCI_ROOT_HID_STRING)) &&
+ strncmp(adi->HardwareId.String, PCI_EXPRESS_ROOT_HID_STRING,
+ sizeof (PCI_EXPRESS_ROOT_HID_STRING))) {
+ AcpiOsFree(adi);
+ return (AE_OK);
+ }
+
+ AcpiOsFree(adi);
+
+ /*
+ * acpica_get_busno() will check the presence of _BBN and
+ * fail if not present. It will then use the _CRS method to
+ * retrieve the actual bus number assigned, it will fall back
+ * to _BBN should the _CRS method fail.
+ */
+ if (ACPI_SUCCESS(acpica_get_busno(hdl, &busnum))) {
+ /*
+ * Ignore invalid _BBN return values here (rather
+ * than panic) and emit a warning; something else
+ * may suffer failure as a result of the broken BIOS.
+ */
+ if (busnum < 0) {
+ dcmn_err(CE_NOTE,
+ "pci_process_acpi_device: invalid _BBN 0x%x",
+ busnum);
+ return (AE_CTRL_DEPTH);
+ }
+
+ if (cb->ppac_func((uint32_t)busnum, cb->ppac_arg))
+ return (AE_CTRL_DEPTH);
+ return (AE_CTRL_TERMINATE);
+ }
+
+ /* PCI and no _BBN, continue walk */
+ return (AE_OK);
+}
+
+void
+pci_prd_root_complex_iter(pci_prd_root_complex_f func, void *arg)
+{
+ void *rv;
+ pci_prd_acpi_cb_t cb;
+
+ cb.ppac_func = func;
+ cb.ppac_arg = arg;
+
+ /*
+ * First scan ACPI devices for anything that might be here. After that,
+ * go through and check the old BIOS IRQ routing table for additional
+ * buses. Note, slot naming from the IRQ table comes later.
+ */
+ (void) AcpiGetDevices(NULL, pci_process_acpi_device, &cb, &rv);
+ pci_bios_bus_iter(func, arg);
+
+}
+
/*
- * At this point, the bus hierarchy entries do not appear to
- * provide anything we can't find out from PCI config space.
- * The only interesting bit is the ISA bus number, which we
- * don't care.
+ * If there is actually a PCI IRQ routing table present, then we want to use
+ * this to go back and update the slot name. In particular, if we have no PCI
+ * IRQ routing table, then we use the existing slot names that were already set
+ * up for us in picex_slot_names_prop() from the capability register. Otherwise,
+ * we actually delete all slot-names properties from buses and instead use
+ * something from the IRQ routing table if it exists.
+ *
+ * Note, the property is always deleted regardless of whether or not it exists
+ * in the IRQ routing table. Finally, we have traditionally kept "pcie0" names
+ * as special as apparently that can't be represented in the IRQ routing table.
*/
-int
-mps_find_parent_bus(int bus)
+void
+pci_prd_slot_name(uint32_t bus, dev_info_t *dip)
{
- struct sasm *sasmp;
- uchar_t *extp;
+ char slotprop[256];
+ int len;
+ char *slotcap_name;
- if (mps_extp == NULL)
- return (-1);
+ if (pci_irq_nroutes == 0)
+ return;
- extp = mps_extp;
- while (extp < mps_ext_endp) {
- bhdp = (struct bhd *)extp;
- switch (*extp) {
- case SYS_AS_MAPPING:
- extp += SYS_AS_MAPPING_SIZE;
- break;
- case BUS_HIERARCHY_DESC:
- if (bhdp->bhd_bus_id == bus)
- return (bhdp->bhd_parent);
- extp += BUS_HIERARCHY_DESC_SIZE;
- break;
- case COMP_BUS_AS_MODIFIER:
- extp += COMP_BUS_AS_MODIFIER_SIZE;
- break;
- default:
- cmn_err(CE_WARN, "Unknown descriptor type %d"
- " in BIOS Multiprocessor Spec table.",
- *extp);
- return (-1);
+ if (dip != NULL) {
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pci_bus_res[bus].dip,
+ DDI_PROP_DONTPASS, "slot-names", &slotcap_name) !=
+ DDI_SUCCESS || strcmp(slotcap_name, "pcie0") != 0) {
+ (void) ndi_prop_remove(DDI_DEV_T_NONE,
+ pci_bus_res[bus].dip, "slot-names");
+ }
+ }
+
+
+ len = pci_slot_names_prop(bus, slotprop, sizeof (slotprop));
+ if (len > 0) {
+ if (dip != NULL) {
+ ASSERT((len % sizeof (int)) == 0);
+ (void) ndi_prop_update_int_array(DDI_DEV_T_NONE,
+ pci_bus_res[bus].dip, "slot-names",
+ (int *)slotprop, len / sizeof (int));
+ } else {
+ cmn_err(CE_NOTE, "!BIOS BUG: Invalid bus number in PCI "
+ "IRQ routing table; Not adding slot-names "
+ "property for incorrect bus %d", bus);
}
}
- return (-1);
}
-int
-hrt_find_bus_range(int bus)
+boolean_t
+pci_prd_multi_root_ok(void)
{
- int i, max_bus, sub_bus;
- struct php_entry *hpep;
+ return (acpi_resource_discovery > 0);
+}
- if (hrt_hpep == NULL || hrt_entry_cnt == 0) {
- return (-1);
+int
+pci_prd_init(pci_prd_upcalls_t *upcalls)
+{
+ if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), DDI_PROP_DONTPASS,
+ "efi-systab")) {
+ pci_prd_have_bios = B_FALSE;
}
- hpep = hrt_hpep;
- max_bus = -1;
- for (i = 0; i < hrt_entry_cnt; i++, hpep++) {
- if (hpep->php_pri_bus != bus)
- continue;
- sub_bus = (int)hpep->php_subord_bus;
- if (sub_bus > max_bus)
- max_bus = sub_bus;
+
+ prd_upcalls = upcalls;
+
+ return (0);
+}
+
+void
+pci_prd_fini(void)
+{
+ int bus;
+
+ for (bus = 0; bus <= pci_bios_maxbus; bus++) {
+ memlist_free_all(&acpi_io_res[bus]);
+ memlist_free_all(&acpi_mem_res[bus]);
+ memlist_free_all(&acpi_pmem_res[bus]);
+ memlist_free_all(&acpi_bus_res[bus]);
}
- return (max_bus);
}
-#endif /* UNUSED_BUS_HIERARY_INFO */
+static struct modlmisc pci_prd_modlmisc_i86pc = {
+ .misc_modops = &mod_miscops,
+ .misc_linkinfo = "i86pc PCI Resource Discovery"
+};
+
+static struct modlinkage pci_prd_modlinkage_i86pc = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &pci_prd_modlmisc_i86pc, NULL }
+};
+
+int
+_init(void)
+{
+ return (mod_install(&pci_prd_modlinkage_i86pc));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&pci_prd_modlinkage_i86pc, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&pci_prd_modlinkage_i86pc));
+}
diff --git a/usr/src/uts/intel/io/pci/pcihrt.h b/usr/src/uts/i86pc/io/pci/pcihrt.h
index 7192eca2c5..7857e0314f 100644
--- a/usr/src/uts/intel/io/pci/pcihrt.h
+++ b/usr/src/uts/i86pc/io/pci/pcihrt.h
@@ -31,16 +31,14 @@
#ifndef _PCIHRT_H
#define _PCIHRT_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
struct hrt_hdr { /* PCI Hot-Plug Configuration Resource Table header */
- uint32_t hrt_sig; /* $HRT */
+ uint32_t hrt_sig; /* $HRT */
uint16_t hrt_avail_imap; /* Bitmap of unused IRQs */
- uint16_t hrt_used_imap; /* Bitmap of IRQs used by PCI */
+ uint16_t hrt_used_imap; /* Bitmap of IRQs used by PCI */
uchar_t hrt_entry_cnt; /* no. of PCI hot-plug slot entries */
uchar_t hrt_ver; /* version no. = 1 */
uchar_t hrt_resv0; /* reserved */
@@ -58,7 +56,7 @@ struct php_entry { /* PCI hot-plug slot entry */
uchar_t php_subord_bus; /* Max Subordinate bus of this slot */
uint16_t php_io_start; /* allocated I/O space starting addr */
uint16_t php_io_size; /* allocated I/O space size in bytes */
- uint16_t php_mem_start; /* allocated Memory space start addr */
+ uint16_t php_mem_start; /* allocated Memory space start addr */
uint16_t php_mem_size; /* allocated Memory space size in 64k */
uint16_t php_pfmem_start; /* allocated Prefetchable Memory start */
uint16_t php_pfmem_size; /* allocated Prefetchable size in 64k */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
index cf00426300..844e8b9708 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
#include <machine/clock.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
-#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index 7584213d39..e94f7a876b 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
* Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
*/
@@ -161,6 +161,8 @@ int vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state,
uint8_t *sipi_vec);
int vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state,
uint8_t sipi_vec);
+int vm_get_fpu(struct vm *vm, int vcpuid, void *buf, size_t len);
+int vm_set_fpu(struct vm *vm, int vcpuid, void *buf, size_t len);
int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_inject_nmi(struct vm *vm, int vcpu);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 16acc1ea2c..78a810880d 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -60,8 +60,8 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/systm.h>
#include <sys/sunddi.h>
+#include <sys/hma.h>
-#include <machine/pcb.h>
#include <machine/md_var.h>
#include <x86/psl.h>
#include <x86/apicreg.h>
@@ -132,7 +132,7 @@ struct vcpu {
int exc_errcode_valid;
uint32_t exc_errcode;
uint8_t sipi_vector; /* (i) SIPI vector */
- struct savefpu *guestfpu; /* (a,i) guest fpu state */
+ hma_fpu_t *guestfpu; /* (a,i) guest fpu state */
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
@@ -318,7 +318,8 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy)
VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
if (destroy) {
vmm_stat_free(vcpu->stats);
- fpu_save_area_free(vcpu->guestfpu);
+ hma_fpu_free(vcpu->guestfpu);
+ vcpu->guestfpu = NULL;
vie_free(vcpu->vie_ctx);
vcpu->vie_ctx = NULL;
vmc_destroy(vcpu->vmclient);
@@ -342,7 +343,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
vcpu->state = VCPU_IDLE;
vcpu->hostcpu = NOCPU;
vcpu->lastloccpu = NOCPU;
- vcpu->guestfpu = fpu_save_area_alloc();
+ vcpu->guestfpu = hma_fpu_alloc(KM_SLEEP);
vcpu->stats = vmm_stat_alloc();
vcpu->vie_ctx = vie_alloc();
@@ -369,7 +370,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
vcpu->extint_pending = 0;
vcpu->exception_pending = 0;
vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
- fpu_save_area_reset(vcpu->guestfpu);
+ hma_fpu_init(vcpu->guestfpu);
vmm_stat_init(vcpu->stats);
vcpu->tsc_offset = 0;
}
@@ -1168,6 +1169,50 @@ vm_set_seg_desc(struct vm *vm, int vcpu, int reg, const struct seg_desc *desc)
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
}
+static int
+translate_hma_xsave_result(hma_fpu_xsave_result_t res)
+{
+ switch (res) {
+ case HFXR_OK:
+ return (0);
+ case HFXR_NO_SPACE:
+ return (ENOSPC);
+ case HFXR_BAD_ALIGN:
+ case HFXR_UNSUP_FMT:
+ case HFXR_UNSUP_FEAT:
+ case HFXR_INVALID_DATA:
+ return (EINVAL);
+ default:
+ panic("unexpected xsave result");
+ }
+}
+
+int
+vm_get_fpu(struct vm *vm, int vcpuid, void *buf, size_t len)
+{
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ hma_fpu_xsave_result_t res;
+
+ res = hma_fpu_get_xsave_state(vcpu->guestfpu, buf, len);
+ return (translate_hma_xsave_result(res));
+}
+
+int
+vm_set_fpu(struct vm *vm, int vcpuid, void *buf, size_t len)
+{
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ hma_fpu_xsave_result_t res;
+
+ res = hma_fpu_set_xsave_state(vcpu->guestfpu, buf, len);
+ return (translate_hma_xsave_result(res));
+}
+
int
vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state, uint8_t *sipi_vec)
{
@@ -1220,13 +1265,9 @@ vm_track_dirty_pages(struct vm *vm, uint64_t gpa, size_t len, uint8_t *bitmap)
static void
restore_guest_fpustate(struct vcpu *vcpu)
{
-
- /* flush host state to the pcb */
- fpuexit(curthread);
-
- /* restore guest FPU state */
+ /* Save host FPU and restore guest FPU */
fpu_stop_emulating();
- fpurestore(vcpu->guestfpu);
+ hma_fpu_start_guest(vcpu->guestfpu);
/* restore guest XCR0 if XSAVE is enabled in the host */
if (rcr4() & CR4_XSAVE)
@@ -1252,9 +1293,9 @@ save_guest_fpustate(struct vcpu *vcpu)
load_xcr(0, vmm_get_host_xcr0());
}
- /* save guest FPU state */
+ /* save guest FPU and restore host FPU */
fpu_stop_emulating();
- fpusave(vcpu->guestfpu);
+ hma_fpu_stop_guest(vcpu->guestfpu);
/*
* When the host state has been restored, we should not re-enable
* CR0.TS on illumos for eager FPU.
@@ -2912,7 +2953,7 @@ vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only)
*/
if (!init_only) {
vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
- fpu_save_area_reset(vcpu->guestfpu);
+ hma_fpu_init(vcpu->guestfpu);
/* XXX: clear MSRs and other pieces */
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index a83989e9eb..4ef2e5f583 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -414,6 +414,8 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_RESET_CPU:
case VM_GET_RUN_STATE:
case VM_SET_RUN_STATE:
+ case VM_GET_FPU:
+ case VM_SET_FPU:
/*
* Copy in the ID of the vCPU chosen for this operation.
* Since a nefarious caller could update their struct between
@@ -469,6 +471,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_GET_GPA_PMAP:
case VM_IOAPIC_PINCOUNT:
case VM_SUSPEND:
+ case VM_DESC_FPU_AREA:
default:
break;
}
@@ -755,6 +758,53 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
}
break;
}
+ case VM_DESC_FPU_AREA: {
+ struct vm_fpu_desc desc;
+ void *buf = NULL;
+
+ if (ddi_copyin(datap, &desc, sizeof (desc), md)) {
+ error = EFAULT;
+ break;
+ }
+ if (desc.vfd_num_entries > 64) {
+ error = EINVAL;
+ break;
+ }
+ const size_t buf_sz = sizeof (struct vm_fpu_desc_entry) *
+ desc.vfd_num_entries;
+ if (buf_sz != 0) {
+ buf = kmem_zalloc(buf_sz, KM_SLEEP);
+ }
+
+ /*
+ * For now, we are depending on vm_fpu_desc_entry and
+ * hma_xsave_state_desc_t having the same format.
+ */
+ CTASSERT(sizeof (struct vm_fpu_desc_entry) ==
+ sizeof (hma_xsave_state_desc_t));
+
+ size_t req_size;
+ const uint_t max_entries = hma_fpu_describe_xsave_state(
+ (hma_xsave_state_desc_t *)buf,
+ desc.vfd_num_entries,
+ &req_size);
+
+ desc.vfd_req_size = req_size;
+ desc.vfd_num_entries = max_entries;
+ if (buf_sz != 0) {
+ if (ddi_copyout(buf, desc.vfd_entry_data, buf_sz, md)) {
+ error = EFAULT;
+ }
+ kmem_free(buf, buf_sz);
+ }
+
+ if (error == 0) {
+ if (ddi_copyout(&desc, datap, sizeof (desc), md)) {
+ error = EFAULT;
+ }
+ }
+ break;
+ }
case VM_ISA_ASSERT_IRQ: {
struct vm_isa_irq isa_irq;
@@ -1040,6 +1090,51 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
vrs.sipi_vector);
break;
}
+ case VM_GET_FPU: {
+ struct vm_fpu_state req;
+ const size_t max_len = (PAGESIZE * 2);
+ void *kbuf;
+
+ if (ddi_copyin(datap, &req, sizeof (req), md)) {
+ error = EFAULT;
+ break;
+ }
+ if (req.len > max_len || req.len == 0) {
+ error = EINVAL;
+ break;
+ }
+ kbuf = kmem_zalloc(req.len, KM_SLEEP);
+ error = vm_get_fpu(sc->vmm_vm, vcpu, kbuf, req.len);
+ if (error == 0) {
+ if (ddi_copyout(kbuf, req.buf, req.len, md)) {
+ error = EFAULT;
+ }
+ }
+ kmem_free(kbuf, req.len);
+ break;
+ }
+ case VM_SET_FPU: {
+ struct vm_fpu_state req;
+ const size_t max_len = (PAGESIZE * 2);
+ void *kbuf;
+
+ if (ddi_copyin(datap, &req, sizeof (req), md)) {
+ error = EFAULT;
+ break;
+ }
+ if (req.len > max_len || req.len == 0) {
+ error = EINVAL;
+ break;
+ }
+ kbuf = kmem_alloc(req.len, KM_SLEEP);
+ if (ddi_copyin(req.buf, kbuf, req.len, md)) {
+ error = EFAULT;
+ } else {
+ error = vm_set_fpu(sc->vmm_vm, vcpu, kbuf, req.len);
+ }
+ kmem_free(kbuf, req.len);
+ break;
+ }
case VM_SET_KERNEMU_DEV:
case VM_GET_KERNEMU_DEV: {
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index f78db731d6..cdcebc71d4 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -58,7 +58,6 @@
#include <sys/x86_archext.h>
#include <machine/cpufunc.h>
-#include <machine/fpu.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
@@ -434,67 +433,6 @@ vmm_cpuid_init(void)
cpu_exthigh = regs[0];
}
-/*
- * FreeBSD uses the struct savefpu for managing the FPU state. That is mimicked
- * by our hypervisor multiplexor framework structure.
- */
-struct savefpu *
-fpu_save_area_alloc(void)
-{
- return ((struct savefpu *)hma_fpu_alloc(KM_SLEEP));
-}
-
-void
-fpu_save_area_free(struct savefpu *fsa)
-{
- hma_fpu_t *fpu = (hma_fpu_t *)fsa;
- hma_fpu_free(fpu);
-}
-
-void
-fpu_save_area_reset(struct savefpu *fsa)
-{
- hma_fpu_t *fpu = (hma_fpu_t *)fsa;
- hma_fpu_init(fpu);
-}
-
-/*
- * This glue function is supposed to save the host's FPU state. This is always
- * paired in the general bhyve code with a call to fpusave. Therefore, we treat
- * this as a nop and do all the work in fpusave(), which will have the context
- * argument that we want anyways.
- */
-void
-fpuexit(kthread_t *td)
-{
-}
-
-/*
- * This glue function is supposed to restore the guest's FPU state from the save
- * area back to the host. In FreeBSD, it is assumed that the host state has
- * already been saved by a call to fpuexit(); however, we do both here.
- */
-void
-fpurestore(void *arg)
-{
- hma_fpu_t *fpu = arg;
-
- hma_fpu_start_guest(fpu);
-}
-
-/*
- * This glue function is supposed to save the guest's FPU state. The host's FPU
- * state is not expected to be restored necessarily due to the use of FPU
- * emulation through CR0.TS. However, we can and do restore it here.
- */
-void
-fpusave(void *arg)
-{
- hma_fpu_t *fpu = arg;
-
- hma_fpu_stop_guest(fpu);
-}
-
void
vmm_sol_glue_init(void)
{
diff --git a/usr/src/uts/i86pc/os/hma_fpu.c b/usr/src/uts/i86pc/os/hma_fpu.c
index 14cfa8baed..138af7a32a 100644
--- a/usr/src/uts/i86pc/os/hma_fpu.c
+++ b/usr/src/uts/i86pc/os/hma_fpu.c
@@ -11,6 +11,7 @@
/*
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*/
/*
@@ -28,6 +29,12 @@
#include <sys/hma.h>
#include <sys/x86_archext.h>
#include <sys/archsystm.h>
+#include <sys/controlregs.h>
+#include <sys/sysmacros.h>
+#include <sys/stdbool.h>
+#include <sys/ontrap.h>
+#include <sys/cpuvar.h>
+#include <sys/disp.h>
struct hma_fpu {
fpu_ctx_t hf_guest_fpu;
@@ -57,7 +64,7 @@ hma_fpu_init(hma_fpu_t *fpu)
xs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs;
bzero(xs, cpuid_get_xsave_size());
bcopy(&avx_initial, xs, sizeof (*xs));
- xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
fpu->hf_guest_fpu.fpu_xsave_mask = XFEATURE_FP_ALL;
break;
default:
@@ -140,6 +147,36 @@ hma_fpu_start_guest(hma_fpu_t *fpu)
fpu->hf_guest_fpu.fpu_flags &= ~FPU_VALID;
}
+/*
+ * Since fp_save() assumes a thread-centric view of the FPU usage -- it will
+ * assert if attempting to save elsewhere than the thread PCB, and will elide
+ * action if the FPU is not enabled -- we cannot use it for the manual saving of
+ * FPU contents. To work around that, we call the save mechanism directly.
+ */
+static void
+do_fp_save(fpu_ctx_t *fpu)
+{
+ /*
+ * For our manual saving, we expect that the thread PCB never be the
+ * landing zone for the data.
+ */
+ ASSERT(curthread->t_lwp == NULL ||
+ fpu != &curthread->t_lwp->lwp_pcb.pcb_fpu);
+
+ switch (fp_save_mech) {
+ case FP_FXSAVE:
+ fpxsave(fpu->fpu_regs.kfpu_u.kfpu_fx);
+ break;
+ case FP_XSAVE:
+ xsavep(fpu->fpu_regs.kfpu_u.kfpu_xs, fpu->fpu_xsave_mask);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ }
+ fpu->fpu_flags |= FPU_VALID;
+}
+
+
void
hma_fpu_stop_guest(hma_fpu_t *fpu)
{
@@ -148,29 +185,232 @@ hma_fpu_stop_guest(hma_fpu_t *fpu)
ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0);
ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, ==, 0);
+ do_fp_save(&fpu->hf_guest_fpu);
+
+ fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu);
+
+ fpu->hf_inguest = B_FALSE;
+ fpu->hf_curthread = NULL;
+}
+
+/*
+ * Will output up to `ndesc` records into `descp`. The required size for an
+ * XSAVE area containing all of the data fields supported by the host will be
+ * placed in `req_sizep` (if non-NULL). Returns the number of feature bits
+ * supported by the host.
+ */
+uint_t
+hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *descp, uint_t ndesc,
+ size_t *req_sizep)
+{
+ uint64_t features;
+
+ switch (fp_save_mech) {
+ case FP_FXSAVE:
+ /*
+ * Even without xsave support, the FPU will have legacy x87
+ * float and SSE state contained within.
+ */
+ features = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ break;
+ case FP_XSAVE:
+ features = get_xcr(XFEATURE_ENABLED_MASK);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ }
+
+ uint_t count, pos;
+ uint_t max_size = MIN_XSAVE_SIZE;
+ for (count = 0, pos = 0; pos <= 63; pos++) {
+ const uint64_t bit = (1 << pos);
+ uint32_t size, off;
+
+ if ((features & bit) == 0) {
+ continue;
+ }
+
+ if (bit == XFEATURE_LEGACY_FP || bit == XFEATURE_SSE) {
+ size = sizeof (struct fxsave_state);
+ off = 0;
+ } else {
+ /*
+ * Size and position of data types within the XSAVE area
+ * is described in leaf 0xD in the subfunction
+ * corresponding to the bit position (for pos > 1).
+ */
+ struct cpuid_regs regs = {
+ .cp_eax = 0xD,
+ .cp_ecx = pos,
+ };
+
+ ASSERT3U(pos, >, 1);
+
+ (void) __cpuid_insn(&regs);
+ size = regs.cp_eax;
+ off = regs.cp_ebx;
+ }
+ max_size = MAX(max_size, off + size);
+
+ if (count < ndesc) {
+ hma_xsave_state_desc_t *desc = &descp[count];
+
+ desc->hxsd_bit = bit;
+ desc->hxsd_size = size;
+ desc->hxsd_off = off;
+ }
+ count++;
+ }
+ if (req_sizep != NULL) {
+ *req_sizep = max_size;
+ }
+ return (count);
+}
+
+hma_fpu_xsave_result_t
+hma_fpu_get_xsave_state(const hma_fpu_t *fpu, void *buf, size_t len)
+{
+ ASSERT(!fpu->hf_inguest);
+
+ size_t valid_len;
+ switch (fp_save_mech) {
+ case FP_FXSAVE: {
+ if (len < MIN_XSAVE_SIZE) {
+ return (HFXR_NO_SPACE);
+ }
+ bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf,
+ sizeof (struct fxsave_state));
+
+ struct xsave_header hdr = {
+ .xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE,
+ };
+ bcopy(&hdr, buf + sizeof (struct fxsave_state), sizeof (hdr));
+
+ break;
+ }
+ case FP_XSAVE:
+ (void) hma_fpu_describe_xsave_state(NULL, 0, &valid_len);
+ if (len < valid_len) {
+ return (HFXR_NO_SPACE);
+ }
+ bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf,
+ valid_len);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ }
+
+ return (HFXR_OK);
+}
+
+hma_fpu_xsave_result_t
+hma_fpu_set_xsave_state(hma_fpu_t *fpu, void *buf, size_t len)
+{
+ ASSERT(!fpu->hf_inguest);
+
+ if (len < MIN_XSAVE_SIZE) {
+ return (HFXR_NO_SPACE);
+ }
+ /* 64-byte alignment is demanded of the FPU-related operations */
+ if (((uintptr_t)buf & 63) != 0) {
+ return (HFXR_BAD_ALIGN);
+ }
+
+ struct xsave_header *hdr = buf + sizeof (struct fxsave_state);
+ if (hdr->xsh_xcomp_bv != 0) {
+ /* XSAVEC formatting not supported at this time */
+ return (HFXR_UNSUP_FMT);
+ }
+
+ uint64_t allowed_bits;
+ size_t save_area_size;
+ switch (fp_save_mech) {
+ case FP_FXSAVE:
+ allowed_bits = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ save_area_size = sizeof (struct fxsave_state);
+ break;
+ case FP_XSAVE:
+ allowed_bits = get_xcr(XFEATURE_ENABLED_MASK);
+ save_area_size = cpuid_get_xsave_size();
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ }
+ if ((hdr->xsh_xstate_bv & ~(allowed_bits)) != 0) {
+ return (HFXR_UNSUP_FEAT);
+ }
+
/*
- * Note, we can't use fp_save because it assumes that we're saving to
- * the thread's PCB and not somewhere else. Because this is a different
- * FPU context, we instead have to do this ourselves.
+ * We validate the incoming state with the FPU itself prior to saving it
+ * into the guest FPU context area. In order to preserve any state
+ * currently housed in the FPU, we save it to a temporarily allocated
+ * FPU context. It is important to note that we are not following the
+ * normal rules around state management detailed in uts/intel/os/fpu.c.
+ * This saving is unconditional, uncaring about the state in the FPU or
+ * the value of CR0_TS, simplifying our process before returning to the
+ * caller (without needing to chcek of an lwp, etc). To prevent
+ * interrupting threads from encountering this unusual FPU state, we
+ * keep interrupts disabled for the duration.
*/
+ fpu_ctx_t temp_ctx = {
+ .fpu_xsave_mask = XFEATURE_FP_ALL,
+ };
+ temp_ctx.fpu_regs.kfpu_u.kfpu_generic =
+ kmem_cache_alloc(fpsave_cachep, KM_SLEEP);
+ bzero(temp_ctx.fpu_regs.kfpu_u.kfpu_generic, save_area_size);
+
+ ulong_t iflag;
+ iflag = intr_clear();
+ bool disable_when_done = (getcr0() & CR0_TS) != 0;
+ do_fp_save(&temp_ctx);
+
+ /*
+ * If the provided data is invalid, it will cause a #GP when we attempt
+ * to load it into the FPU, so protect against that with on_trap().
+ * Should the data load successfully, we can then be confident that its
+ * later use in via hma_fpu_start_guest() will be safe.
+ */
+ on_trap_data_t otd;
+ volatile hma_fpu_xsave_result_t res = HFXR_OK;
+ if (on_trap(&otd, OT_DATA_EC) != 0) {
+ res = HFXR_INVALID_DATA;
+ goto done;
+ }
+
switch (fp_save_mech) {
case FP_FXSAVE:
- fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx);
+ if (hdr->xsh_xstate_bv == 0) {
+ /*
+ * An empty xstate_bv means we can simply load the
+ * legacy FP/SSE area with their initial state.
+ */
+ bcopy(&sse_initial,
+ fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx,
+ sizeof (sse_initial));
+ } else {
+ fpxrestore(buf);
+ fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx);
+ }
break;
case FP_XSAVE:
+ xrestore(buf, XFEATURE_FP_ALL);
xsavep(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs,
fpu->hf_guest_fpu.fpu_xsave_mask);
break;
default:
panic("Invalid fp_save_mech");
- /*NOTREACHED*/
}
- fpu->hf_guest_fpu.fpu_flags |= FPU_VALID;
- fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu);
+done:
+ no_trap();
+ fp_restore(&temp_ctx);
+ if (disable_when_done) {
+ fpdisable();
+ }
+ intr_restore(iflag);
+ kmem_cache_free(fpsave_cachep, temp_ctx.fpu_regs.kfpu_u.kfpu_generic);
- fpu->hf_inguest = B_FALSE;
- fpu->hf_curthread = NULL;
+ return (res);
}
void
@@ -214,11 +454,11 @@ hma_fpu_set_fxsave_state(hma_fpu_t *fpu, const struct fxsave_state *fx)
gxs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs;
bzero(gxs, cpuid_get_xsave_size());
bcopy(fx, &gxs->xs_fxsave, sizeof (*fx));
- gxs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ gxs->xs_header.xsh_xstate_bv =
+ XFEATURE_LEGACY_FP | XFEATURE_SSE;
break;
default:
panic("Invalid fp_save_mech");
- /* NOTREACHED */
}
return (0);
diff --git a/usr/src/uts/i86pc/os/pci_bios.c b/usr/src/uts/i86pc/os/pci_bios.c
index a24064b8cb..84653435ad 100644
--- a/usr/src/uts/i86pc/os/pci_bios.c
+++ b/usr/src/uts/i86pc/os/pci_bios.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/types.h>
@@ -218,3 +219,23 @@ pci_slot_names_prop(int bus, char *buf, int len)
*(buf + plen) = 0;
return (plen);
}
+
+/*
+ * This is used to discover additional PCI buses that may exist in the system in
+ * addition to the ACPI _BBN method. Historically these were discovered by
+ * asking if there was a valid slot property, e.g. pci_slot_names_prop()
+ * returned valid data. In this case we return any entry that has a bus number
+ * and a non-zero slot value. We rely on the core PCI code to do dedup for us.
+ */
+void
+pci_bios_bus_iter(pci_prd_root_complex_f cbfunc, void *arg)
+{
+ int i;
+ for (i = 0; i < pci_irq_nroutes; i++) {
+ if (pci_irq_routes[i].pir_slot != 0) {
+ if (!cbfunc(pci_irq_routes[i].pir_bus, arg)) {
+ return;
+ }
+ }
+ }
+}
diff --git a/usr/src/uts/i86pc/pci_prd/Makefile b/usr/src/uts/i86pc/pci_prd/Makefile
new file mode 100644
index 0000000000..ca262cc4a4
--- /dev/null
+++ b/usr/src/uts/i86pc/pci_prd/Makefile
@@ -0,0 +1,66 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2022 Oxide Computer Company
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = pci_prd
+OBJECTS = $(PCI_PRD_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -Nmisc/acpica
+
+#
+# Overrides
+#
+
+ALL_BUILDS = $(ALL_BUILDSONLY64)
+DEF_BUILDS = $(DEF_BUILDSONLY64)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h
index 16ab708896..e15cd60d5e 100644
--- a/usr/src/uts/i86pc/sys/hma.h
+++ b/usr/src/uts/i86pc/sys/hma.h
@@ -11,6 +11,7 @@
/*
* Copyright 2019 Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*/
#ifndef _SYS_HMA_H
@@ -117,6 +118,43 @@ extern void hma_fpu_start_guest(hma_fpu_t *);
*/
extern void hma_fpu_stop_guest(hma_fpu_t *);
+typedef enum {
+ HFXR_OK = 0,
+ HFXR_NO_SPACE, /* buffer is not large enough */
+ HFXR_BAD_ALIGN, /* buffer is not properly (64-byte) aligned */
+ HFXR_UNSUP_FMT, /* data using unsupported (compressed) format */
+ HFXR_UNSUP_FEAT, /* data has unsupported features set */
+ HFXR_INVALID_DATA, /* CPU determined xsave data is invalid */
+} hma_fpu_xsave_result_t;
+
+/*
+ * Get and set the contents of the FPU save area, formatted as XSAVE-style
+ * information. If XSAVE is not supported by the host, the input and output
+ * values will be translated to and from the FXSAVE format. Attempts to set
+ * XSAVE values not supported by the host will result in an error.
+ *
+ * These functions cannot be called while the FPU is in use by the guest. It is
+ * up to callers to guarantee this invariant.
+ */
+extern hma_fpu_xsave_result_t hma_fpu_get_xsave_state(const hma_fpu_t *, void *,
+ size_t);
+extern hma_fpu_xsave_result_t hma_fpu_set_xsave_state(hma_fpu_t *, void *,
+ size_t);
+
+typedef struct hma_xsave_state_desc {
+ uint64_t hxsd_bit;
+ uint32_t hxsd_size;
+ uint32_t hxsd_off;
+} hma_xsave_state_desc_t;
+
+/*
+ * Get a description of the data fields supported by the host via the XSAVE APIs
+ * for getting/setting guest FPU data. See the function definition for more
+ * detailed parameter usage.
+ */
+extern uint_t hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *, uint_t,
+ size_t *);
+
/*
* Get and set the contents of the FPU save area. This sets the fxsave style
* information. In all cases when this is in use, if an XSAVE state is actually
diff --git a/usr/src/uts/i86pc/sys/pci_cfgspace_impl.h b/usr/src/uts/i86pc/sys/pci_cfgspace_impl.h
index 6cec63bcfd..05e0710bbe 100644
--- a/usr/src/uts/i86pc/sys/pci_cfgspace_impl.h
+++ b/usr/src/uts/i86pc/sys/pci_cfgspace_impl.h
@@ -21,16 +21,18 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#ifndef _SYS_PCI_CFGSPACE_IMPL_H
#define _SYS_PCI_CFGSPACE_IMPL_H
/*
- * Routines to support particular PCI chipsets
+ * Routines to support particular PCI chipsets and the PCI BIOS.
*/
+#include <sys/plat/pci_prd.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -183,6 +185,11 @@ typedef struct pci_irq_route_hdr {
} pci_irq_route_hdr_t;
#pragma pack()
+extern int pci_irq_nroutes;
+
+extern int pci_slot_names_prop(int, char *, int);
+extern void pci_bios_bus_iter(pci_prd_root_complex_f, void *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index 3282fa86bf..027a7da214 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -302,6 +302,25 @@ struct vm_run_state {
uint8_t _pad[3];
};
+/* Transfer data for VM_GET_FPU and VM_SET_FPU */
+struct vm_fpu_state {
+ int vcpuid;
+ void *buf;
+ size_t len;
+};
+
+struct vm_fpu_desc_entry {
+ uint64_t vfde_feature;
+ uint32_t vfde_size;
+ uint32_t vfde_off;
+};
+
+struct vm_fpu_desc {
+ struct vm_fpu_desc_entry *vfd_entry_data;
+ size_t vfd_req_size;
+ uint32_t vfd_num_entries;
+};
+
struct vmm_resv_query {
size_t vrq_free_sz;
size_t vrq_alloc_sz;
@@ -370,6 +389,8 @@ struct vmm_dirty_tracker {
#define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16)
#define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17)
#define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18)
+#define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19)
+#define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a)
/* Operations requiring write-locking the VM */
#define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01)
@@ -428,6 +449,7 @@ struct vmm_dirty_tracker {
/* Note: forces a barrier on a flush operation before returning. */
#define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20)
+#define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21)
#define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff)
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index 20d66a08e4..38b1177bbe 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -165,7 +165,7 @@ PCIEB_OBJS += pcieb_x86.o
PIT_BEEP_OBJS += pit_beep.o
POWER_OBJS += power.o
PCI_AUTOCONFIG_OBJS += pci_autoconfig.o pci_boot.o pcie_nvidia.o \
- pci_memlist.o pci_resource.o
+ pci_memlist.o
RADEON_OBJS += r300_cmdbuf.o radeon_cp.o radeon_drv.o \
radeon_state.o radeon_irq.o radeon_mem.o
SD_OBJS += sd.o sd_xbuf.o
diff --git a/usr/src/uts/intel/io/pci/pci_autoconfig.c b/usr/src/uts/intel/io/pci/pci_autoconfig.c
index 713493eedb..af7ba637d8 100644
--- a/usr/src/uts/intel/io/pci/pci_autoconfig.c
+++ b/usr/src/uts/intel/io/pci/pci_autoconfig.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2022 Oxide Computer Company
*/
/*
@@ -40,8 +41,10 @@
#include <sys/reboot.h>
#include <sys/pci_cfgspace_impl.h>
#include <sys/mutex.h>
+#include <sys/plat/pci_prd.h>
extern int pci_boot_debug;
+extern int pci_boot_maxbus;
/*
* Interface routines
@@ -49,7 +52,7 @@ extern int pci_boot_debug;
void pci_enumerate(int);
void pci_setup_tree(void);
void pci_reprogram(void);
-void bus_res_fini(void);
+dev_info_t *pci_boot_bus_to_dip(uint32_t);
static struct modlmisc modlmisc = {
&mod_miscops, "PCI BIOS interface"
@@ -59,13 +62,23 @@ static struct modlinkage modlinkage = {
MODREV_1, (void *)&modlmisc, NULL
};
+static pci_prd_upcalls_t pci_upcalls = {
+ .pru_bus2dip_f = pci_boot_bus_to_dip
+};
+
int
_init(void)
{
int err;
- if ((err = mod_install(&modlinkage)) != 0)
+ if ((err = pci_prd_init(&pci_upcalls)) != 0) {
+ return (err);
+ }
+
+ if ((err = mod_install(&modlinkage)) != 0) {
+ pci_prd_fini();
return (err);
+ }
impl_bus_add_probe(pci_enumerate);
return (0);
@@ -80,7 +93,7 @@ _fini(void)
return (err);
impl_bus_delete_probe(pci_enumerate);
- bus_res_fini();
+ pci_prd_fini();
return (0);
}
@@ -102,6 +115,14 @@ pci_enumerate(int reprogram)
extern void add_pci_fixes(void);
extern void undo_pci_fixes(void);
+ /*
+ * On our first pass through here actually determine what the maximum
+ * bus that we should use is.
+ */
+ if (reprogram == 0) {
+ pci_boot_maxbus = pci_prd_max_bus();
+ }
+
add_pci_fixes();
if (reprogram) {
diff --git a/usr/src/uts/intel/io/pci/pci_boot.c b/usr/src/uts/intel/io/pci/pci_boot.c
index 203d4292ee..6d72fe6507 100644
--- a/usr/src/uts/intel/io/pci/pci_boot.c
+++ b/usr/src/uts/intel/io/pci/pci_boot.c
@@ -23,6 +23,7 @@
* Copyright 2019 Joyent, Inc.
* Copyright 2019 Western Digital Corporation
* Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 Oxide Computer Company
*/
/*
@@ -101,18 +102,21 @@
* add_pci_fixes()
* As for first pass.
* pci_reprogram()
- * pci_scan_bbn()
- * The ACPI namespace is scanned for top-level
- * instances of _BBN in order to enumerate the
- * root-bridges in the system. If a root bridge is
- * found that has not been previously discovered
- * (existence inferred through its children) then
- * it is added to the system.
+ * pci_prd_root_complex_iter()
+ * The platform is asked to tell us of all root
+ * complexes that it knows about (e.g. using the
+ * _BBN method via ACPI). This will include buses
+ * that we've already discovered and those that we
+ * potentially haven't. Anything that has not been
+ * previously discovered (or inferred to exist) is
+ * then added to the system.
* <foreach ROOT bus>
* populate_bus_res()
* Find resources associated with this root bus
- * from either ACPI or BIOS tables. See
- * find_bus_res() in pci_resource.c
+ * based on what the platform provideds through the
+ * pci platform interfaces defined in
+ * sys/plat/pci_prd.h. On i86pc this is driven by
+ * ACPI and BIOS tables.
* <foreach bus>
* fix_ppb_res()
* Reprogram pci(e) bridges which have not already
@@ -158,7 +162,6 @@
#include <sys/pcie_impl.h>
#include <sys/memlist.h>
#include <sys/bootconf.h>
-#include <io/pci/mps_table.h>
#include <sys/pci_cfgacc.h>
#include <sys/pci_cfgspace.h>
#include <sys/pci_cfgspace_impl.h>
@@ -172,6 +175,7 @@
#include <sys/iommulib.h>
#include <sys/devcache.h>
#include <sys/pci_cfgacc_x86.h>
+#include <sys/plat/pci_prd.h>
#define pci_getb (*pci_getb_func)
#define pci_getw (*pci_getw_func)
@@ -242,16 +246,13 @@ struct pci_devfunc {
extern int apic_nvidia_io_max;
extern int pseudo_isa;
-extern int pci_bios_maxbus;
static uchar_t max_dev_pci = 32; /* PCI standard */
+int pci_boot_maxbus;
int pci_boot_debug = 0;
int pci_debug_bus_start = -1;
int pci_debug_bus_end = -1;
-extern struct memlist *find_bus_res(int, int);
static struct pci_fixundo *undolist = NULL;
static int num_root_bus = 0; /* count of root buses */
-extern volatile int acpi_resource_discovery;
-extern uint64_t mcfg_mem_base;
extern void pci_cfgacc_add_workaround(uint16_t, uchar_t, uchar_t);
extern dev_info_t *pcie_get_rc_dip(dev_info_t *);
@@ -269,12 +270,11 @@ static void add_ppb_props(dev_info_t *, uchar_t, uchar_t, uchar_t, int,
ushort_t);
static void add_model_prop(dev_info_t *, uint_t);
static void add_bus_range_prop(int);
-static void add_bus_slot_names_prop(int);
static void add_ranges_prop(int, int);
static void add_bus_available_prop(int);
static int get_pci_cap(uchar_t bus, uchar_t dev, uchar_t func, uint8_t cap_id);
static void fix_ppb_res(uchar_t, boolean_t);
-static void alloc_res_array();
+static void alloc_res_array(void);
static void create_ioapic_node(int bus, int dev, int fn, ushort_t vendorid,
ushort_t deviceid);
static void pciex_slot_names_prop(dev_info_t *, ushort_t);
@@ -283,7 +283,6 @@ static void memlist_remove_list(struct memlist **list,
struct memlist *remove_list);
static void ck804_fix_aer_ptr(dev_info_t *, pcie_req_id_t);
-static void pci_scan_bbn(void);
static int pci_unitaddr_cache_valid(void);
static int pci_bus_unitaddr(int);
static void pci_unitaddr_cache_create(void);
@@ -292,8 +291,6 @@ static int pci_cache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *);
static int pci_cache_pack_nvlist(nvf_handle_t, nvlist_t **);
static void pci_cache_free_list(nvf_handle_t);
-extern int pci_slot_names_prop(int, char *, int);
-
/* set non-zero to force PCI peer-bus renumbering */
int pci_bus_always_renumber = 0;
@@ -326,6 +323,13 @@ typedef struct {
nvf_handle_t puafd_handle;
int pua_cache_valid = 0;
+dev_info_t *
+pci_boot_bus_to_dip(uint32_t busno)
+{
+ ASSERT3U(busno, <=, pci_boot_maxbus);
+ return (pci_bus_res[busno].dip);
+}
+
static void
dump_memlists_impl(const char *tag, int bus)
{
@@ -356,80 +360,21 @@ dump_memlists_impl(const char *tag, int bus)
}
}
-/*ARGSUSED*/
-static ACPI_STATUS
-pci_process_acpi_device(ACPI_HANDLE hdl, UINT32 level, void *ctx, void **rv)
+static boolean_t
+pci_rc_scan_cb(uint32_t busno, void *arg)
{
- ACPI_DEVICE_INFO *adi;
- int busnum;
-
- /*
- * Use AcpiGetObjectInfo() to find the device _HID
- * If not a PCI root-bus, ignore this device and continue
- * the walk
- */
- if (ACPI_FAILURE(AcpiGetObjectInfo(hdl, &adi)))
- return (AE_OK);
-
- if (!(adi->Valid & ACPI_VALID_HID)) {
- AcpiOsFree(adi);
- return (AE_OK);
- }
-
- if (strncmp(adi->HardwareId.String, PCI_ROOT_HID_STRING,
- sizeof (PCI_ROOT_HID_STRING)) &&
- strncmp(adi->HardwareId.String, PCI_EXPRESS_ROOT_HID_STRING,
- sizeof (PCI_EXPRESS_ROOT_HID_STRING))) {
- AcpiOsFree(adi);
- return (AE_OK);
+ if (busno > pci_boot_maxbus) {
+ dcmn_err(CE_NOTE, "platform root complex scan returned bus "
+ "with invalid bus id: 0x%x", busno);
+ return (B_TRUE);
}
- AcpiOsFree(adi);
-
- /*
- * acpica_get_busno() will check the presence of _BBN and
- * fail if not present. It will then use the _CRS method to
- * retrieve the actual bus number assigned, it will fall back
- * to _BBN should the _CRS method fail.
- */
- if (ACPI_SUCCESS(acpica_get_busno(hdl, &busnum))) {
- /*
- * Ignore invalid _BBN return values here (rather
- * than panic) and emit a warning; something else
- * may suffer failure as a result of the broken BIOS.
- */
- if ((busnum < 0) || (busnum > pci_bios_maxbus)) {
- dcmn_err(CE_NOTE,
- "pci_process_acpi_device: invalid _BBN 0x%x",
- busnum);
- return (AE_CTRL_DEPTH);
- }
-
- /* PCI with valid _BBN */
- if (pci_bus_res[busnum].par_bus == (uchar_t)-1 &&
- pci_bus_res[busnum].dip == NULL)
- create_root_bus_dip((uchar_t)busnum);
- return (AE_CTRL_DEPTH);
+ if (pci_bus_res[busno].par_bus == (uchar_t)-1 &&
+ pci_bus_res[busno].dip == NULL) {
+ create_root_bus_dip((uchar_t)busno);
}
- /* PCI and no _BBN, continue walk */
- return (AE_OK);
-}
-
-/*
- * Scan the ACPI namespace for all top-level instances of _BBN
- * in order to discover childless root-bridges (which enumeration
- * may not find; root-bridges are inferred by the existence of
- * children). This scan should find all root-bridges that have
- * been enumerated, and any childless root-bridges not enumerated.
- * Root-bridge for bus 0 may not have a _BBN object.
- */
-static void
-pci_scan_bbn()
-{
- void *rv;
-
- (void) AcpiGetDevices(NULL, pci_process_acpi_device, NULL, &rv);
+ return (B_TRUE);
}
static void
@@ -596,7 +541,7 @@ pci_unitaddr_cache_create(void)
index = 0;
listp = nvf_list(puafd_handle);
- for (i = 0; i <= pci_bios_maxbus; i++) {
+ for (i = 0; i <= pci_boot_maxbus; i++) {
/* skip non-root (peer) PCI busses */
if ((pci_bus_res[i].par_bus != (uchar_t)-1) ||
(pci_bus_res[i].dip == NULL))
@@ -622,7 +567,7 @@ pci_setup_tree(void)
uint_t i, root_bus_addr = 0;
alloc_res_array();
- for (i = 0; i <= pci_bios_maxbus; i++) {
+ for (i = 0; i <= pci_boot_maxbus; i++) {
pci_bus_res[i].par_bus = (uchar_t)-1;
pci_bus_res[i].root_addr = (uchar_t)-1;
pci_bus_res[i].sub_bus = i;
@@ -635,7 +580,7 @@ pci_setup_tree(void)
/*
* Now enumerate peer busses
*
- * We loop till pci_bios_maxbus. On most systems, there is
+ * We loop till pci_boot_maxbus. On most systems, there is
* one more bus at the high end, which implements the ISA
* compatibility bus. We don't care about that.
*
@@ -646,144 +591,11 @@ pci_setup_tree(void)
* However, we stop enumerating phantom peers with no
* device below.
*/
- for (i = 1; i <= pci_bios_maxbus; i++) {
+ for (i = 1; i <= pci_boot_maxbus; i++) {
if (pci_bus_res[i].dip == NULL) {
pci_bus_res[i].root_addr = root_bus_addr++;
}
enumerate_bus_devs(i, CONFIG_INFO);
-
- /* add slot-names property for named pci hot-plug slots */
- add_bus_slot_names_prop(i);
- }
-}
-
-/*
- * >0 = present, 0 = not present, <0 = error
- */
-static int
-pci_bbn_present(int bus)
-{
- ACPI_HANDLE hdl;
- int rv;
-
- /* no dip means no _BBN */
- if (pci_bus_res[bus].dip == NULL)
- return (0);
-
- rv = -1; /* default return value in case of error below */
- if (ACPI_SUCCESS(acpica_get_handle(pci_bus_res[bus].dip, &hdl))) {
- switch (AcpiEvaluateObject(hdl, "_BBN", NULL, NULL)) {
- case AE_OK:
- rv = 1;
- break;
- case AE_NOT_FOUND:
- rv = 0;
- break;
- default:
- break;
- }
- }
-
- return (rv);
-}
-
-/*
- * Return non-zero if any PCI bus in the system has an associated
- * _BBN object, 0 otherwise.
- */
-static int
-pci_roots_have_bbn(void)
-{
- int i;
-
- /*
- * Scan the PCI busses and look for at least 1 _BBN
- */
- for (i = 0; i <= pci_bios_maxbus; i++) {
- /* skip non-root (peer) PCI busses */
- if (pci_bus_res[i].par_bus != (uchar_t)-1)
- continue;
-
- if (pci_bbn_present(i) > 0)
- return (1);
- }
- return (0);
-
-}
-
-/*
- * return non-zero if the machine is one on which we renumber
- * the internal pci unit-addresses
- */
-static int
-pci_bus_renumber()
-{
- ACPI_TABLE_HEADER *fadt;
-
- if (pci_bus_always_renumber)
- return (1);
-
- /* get the FADT */
- if (AcpiGetTable(ACPI_SIG_FADT, 1, (ACPI_TABLE_HEADER **)&fadt) !=
- AE_OK)
- return (0);
-
- /* compare OEM Table ID to "SUNm31" */
- if (strncmp("SUNm31", fadt->OemId, 6))
- return (0);
- else
- return (1);
-}
-
-/*
- * Initial enumeration of the physical PCI bus hierarchy can
- * leave 'gaps' in the order of peer PCI bus unit-addresses.
- * Systems with more than one peer PCI bus *must* have an ACPI
- * _BBN object associated with each peer bus; use the presence
- * of this object to remove gaps in the numbering of the peer
- * PCI bus unit-addresses - only peer busses with an associated
- * _BBN are counted.
- */
-static void
-pci_renumber_root_busses(void)
-{
- int pci_regs[] = {0, 0, 0};
- int i, root_addr = 0;
-
- /*
- * Currently, we only enable the re-numbering on specific
- * Sun machines; this is a work-around for the more complicated
- * issue of upgrade changing physical device paths
- */
- if (!pci_bus_renumber())
- return;
-
- /*
- * If we find no _BBN objects at all, we either don't need
- * to do anything or can't do anything anyway
- */
- if (!pci_roots_have_bbn())
- return;
-
- for (i = 0; i <= pci_bios_maxbus; i++) {
- /* skip non-root (peer) PCI busses */
- if (pci_bus_res[i].par_bus != (uchar_t)-1)
- continue;
-
- if (pci_bbn_present(i) < 1) {
- pci_bus_res[i].root_addr = (uchar_t)-1;
- continue;
- }
-
- ASSERT(pci_bus_res[i].dip != NULL);
- if (pci_bus_res[i].root_addr != root_addr) {
- /* update reg property for node */
- pci_bus_res[i].root_addr = root_addr;
- pci_regs[0] = pci_bus_res[i].root_addr;
- (void) ndi_prop_update_int_array(DDI_DEV_T_NONE,
- pci_bus_res[i].dip, "reg", (int *)pci_regs, 3);
- }
- root_addr++;
}
}
@@ -810,12 +622,12 @@ remove_subtractive_res()
int i, j;
struct memlist *list;
- for (i = 0; i <= pci_bios_maxbus; i++) {
+ for (i = 0; i <= pci_boot_maxbus; i++) {
if (pci_bus_res[i].subtractive) {
/* remove used io ports */
list = pci_bus_res[i].io_used;
while (list) {
- for (j = 0; j <= pci_bios_maxbus; j++)
+ for (j = 0; j <= pci_boot_maxbus; j++)
(void) memlist_remove(
&pci_bus_res[j].io_avail,
list->ml_address, list->ml_size);
@@ -824,7 +636,7 @@ remove_subtractive_res()
/* remove used mem resource */
list = pci_bus_res[i].mem_used;
while (list) {
- for (j = 0; j <= pci_bios_maxbus; j++) {
+ for (j = 0; j <= pci_boot_maxbus; j++) {
(void) memlist_remove(
&pci_bus_res[j].mem_avail,
list->ml_address, list->ml_size);
@@ -837,7 +649,7 @@ remove_subtractive_res()
/* remove used prefetchable mem resource */
list = pci_bus_res[i].pmem_used;
while (list) {
- for (j = 0; j <= pci_bios_maxbus; j++) {
+ for (j = 0; j <= pci_boot_maxbus; j++) {
(void) memlist_remove(
&pci_bus_res[j].pmem_avail,
list->ml_address, list->ml_size);
@@ -905,7 +717,7 @@ get_parbus_res(uchar_t parbus, uchar_t bus, uint64_t size, uint64_t align,
* accounted for in this case.
*/
if ((pci_bus_res[parbus].par_bus == (uchar_t)-1) &&
- (num_root_bus > 1) && (acpi_resource_discovery <= 0)) {
+ (num_root_bus > 1) && !pci_prd_multi_root_ok()) {
return (0);
}
@@ -1593,10 +1405,16 @@ pci_reprogram(void)
int bus;
/*
- * Scan ACPI namespace for _BBN objects, make sure that
- * childless root-bridges appear in devinfo tree
+ * Ask platform code for all of the root complexes it knows about in
+ * case we have missed anything in the scan. This is to ensure that we
+ * have them show up in the devinfo tree. This scan should find any
+ * existing entries as well. After this, go through each bus and
+ * ask the platform if it wants to change the name of the slot.
*/
- pci_scan_bbn();
+ pci_prd_root_complex_iter(pci_rc_scan_cb, NULL);
+ for (bus = 0; bus <= pci_boot_maxbus; bus++) {
+ pci_prd_slot_name(bus, pci_bus_res[bus].dip);
+ }
pci_unitaddr_cache_init();
/*
@@ -1607,7 +1425,7 @@ pci_reprogram(void)
int new_addr;
int index = 0;
- for (bus = 0; bus <= pci_bios_maxbus; bus++) {
+ for (bus = 0; bus <= pci_boot_maxbus; bus++) {
/* skip non-root (peer) PCI busses */
if ((pci_bus_res[bus].par_bus != (uchar_t)-1) ||
(pci_bus_res[bus].dip == NULL))
@@ -1626,14 +1444,13 @@ pci_reprogram(void)
}
} else {
/* perform legacy processing */
- pci_renumber_root_busses();
pci_unitaddr_cache_create();
}
/*
* Do root-bus resource discovery
*/
- for (bus = 0; bus <= pci_bios_maxbus; bus++) {
+ for (bus = 0; bus <= pci_boot_maxbus; bus++) {
/* skip non-root (peer) PCI busses */
if (pci_bus_res[bus].par_bus != (uchar_t)-1)
continue;
@@ -1683,7 +1500,7 @@ pci_reprogram(void)
memlist_free_all(&isa_res.mem_used);
/* add bus-range property for root/peer bus nodes */
- for (i = 0; i <= pci_bios_maxbus; i++) {
+ for (i = 0; i <= pci_boot_maxbus; i++) {
/* create bus-range property on root/peer buses */
if (pci_bus_res[i].par_bus == (uchar_t)-1)
add_bus_range_prop(i);
@@ -1705,10 +1522,10 @@ pci_reprogram(void)
/* reprogram the non-subtractive PPB */
if (pci_reconfig)
- for (i = 0; i <= pci_bios_maxbus; i++)
+ for (i = 0; i <= pci_boot_maxbus; i++)
fix_ppb_res(i, B_FALSE);
- for (i = 0; i <= pci_bios_maxbus; i++) {
+ for (i = 0; i <= pci_boot_maxbus; i++) {
/* configure devices not configured by BIOS */
if (pci_reconfig) {
/*
@@ -1722,7 +1539,7 @@ pci_reprogram(void)
}
/* All dev programmed, so we can create available prop */
- for (i = 0; i <= pci_bios_maxbus; i++)
+ for (i = 0; i <= pci_boot_maxbus; i++)
add_bus_available_prop(i);
}
@@ -1734,10 +1551,11 @@ populate_bus_res(uchar_t bus)
{
/* scan BIOS structures */
- pci_bus_res[bus].pmem_avail = find_bus_res(bus, PREFETCH_TYPE);
- pci_bus_res[bus].mem_avail = find_bus_res(bus, MEM_TYPE);
- pci_bus_res[bus].io_avail = find_bus_res(bus, IO_TYPE);
- pci_bus_res[bus].bus_avail = find_bus_res(bus, BUSRANGE_TYPE);
+ pci_bus_res[bus].pmem_avail = pci_prd_find_resource(bus,
+ PCI_PRD_R_PREFETCH);
+ pci_bus_res[bus].mem_avail = pci_prd_find_resource(bus, PCI_PRD_R_MMIO);
+ pci_bus_res[bus].io_avail = pci_prd_find_resource(bus, PCI_PRD_R_IO);
+ pci_bus_res[bus].bus_avail = pci_prd_find_resource(bus, PCI_PRD_R_BUS);
/*
* attempt to initialize sub_bus from the largest range-end
@@ -2054,7 +1872,7 @@ add_pci_fixes(void)
{
int i;
- for (i = 0; i <= pci_bios_maxbus; i++) {
+ for (i = 0; i <= pci_boot_maxbus; i++) {
/*
* For each bus, apply needed fixes to the appropriate devices.
* This must be done before the main enumeration loop because
@@ -3197,8 +3015,8 @@ add_ppb_props(dev_info_t *dip, uchar_t bus, uchar_t dev, uchar_t func,
* Some BIOSes lie about max pci busses, we allow for
* such mistakes here
*/
- if (subbus > pci_bios_maxbus) {
- pci_bios_maxbus = subbus;
+ if (subbus > pci_boot_maxbus) {
+ pci_boot_maxbus = subbus;
alloc_res_array();
}
@@ -3436,66 +3254,6 @@ add_bus_range_prop(int bus)
}
/*
- * Add slot-names property for any named pci hot-plug slots
- */
-static void
-add_bus_slot_names_prop(int bus)
-{
- char slotprop[256];
- int len;
- extern int pci_irq_nroutes;
- char *slotcap_name;
-
- /*
- * If no irq routing table, then go with the slot-names as set up
- * in pciex_slot_names_prop() from slot capability register (if any).
- */
- if (pci_irq_nroutes == 0)
- return;
-
- /*
- * Otherise delete the slot-names we already have and use the irq
- * routing table values as returned by pci_slot_names_prop() instead,
- * but keep any property of value "pcie0" as that can't be represented
- * in the irq routing table.
- */
- if (pci_bus_res[bus].dip != NULL) {
- if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pci_bus_res[bus].dip,
- DDI_PROP_DONTPASS, "slot-names", &slotcap_name) !=
- DDI_SUCCESS || strcmp(slotcap_name, "pcie0") != 0)
- (void) ndi_prop_remove(DDI_DEV_T_NONE,
- pci_bus_res[bus].dip, "slot-names");
- }
-
- len = pci_slot_names_prop(bus, slotprop, sizeof (slotprop));
- if (len > 0) {
- /*
- * Only create a peer bus node if this bus may be a peer bus.
- * It may be a peer bus if the dip is NULL and if par_bus is
- * -1 (par_bus is -1 if this bus was not found to be
- * subordinate to any PCI-PCI bridge).
- * If it's not a peer bus, then the ACPI BBN-handling code
- * will remove it later.
- */
- if (pci_bus_res[bus].par_bus == (uchar_t)-1 &&
- pci_bus_res[bus].dip == NULL) {
-
- create_root_bus_dip(bus);
- }
- if (pci_bus_res[bus].dip != NULL) {
- ASSERT((len % sizeof (int)) == 0);
- (void) ndi_prop_update_int_array(DDI_DEV_T_NONE,
- pci_bus_res[bus].dip, "slot-names",
- (int *)slotprop, len / sizeof (int));
- } else {
- cmn_err(CE_NOTE, "!BIOS BUG: Invalid bus number in PCI "
- "IRQ routing table; Not adding slot-names "
- "property for incorrect bus %d", bus);
- }
- }
-}
-
-/*
* Handle both PCI root and PCI-PCI bridge range properties;
* non-zero 'ppb' argument select PCI-PCI bridges versus root.
*/
@@ -3652,11 +3410,11 @@ add_bus_available_prop(int bus)
static void
alloc_res_array(void)
{
- static int array_size = 0;
- int old_size;
+ static uint_t array_size = 0;
+ uint_t old_size;
void *old_res;
- if (array_size > pci_bios_maxbus + 1)
+ if (array_size > pci_boot_maxbus + 1)
return; /* array is big enough */
old_size = array_size;
@@ -3665,7 +3423,7 @@ alloc_res_array(void)
if (array_size == 0)
array_size = 16; /* start with a reasonable number */
- while (array_size <= pci_bios_maxbus + 1)
+ while (array_size <= pci_boot_maxbus + 1)
array_size <<= 1;
pci_bus_res = (struct pci_bus_resource *)kmem_zalloc(
array_size * sizeof (struct pci_bus_resource), KM_SLEEP);
diff --git a/usr/src/uts/intel/io/pci/pci_memlist.c b/usr/src/uts/intel/io/pci/pci_memlist.c
index 5786591420..4da76951e9 100644
--- a/usr/src/uts/intel/io/pci/pci_memlist.c
+++ b/usr/src/uts/intel/io/pci/pci_memlist.c
@@ -40,8 +40,8 @@
#include <sys/pci_impl.h>
#include <sys/debug.h>
-extern int pci_boot_debug;
-#define dprintf if (pci_boot_debug) printf
+int pci_memlist_debug;
+#define dprintf if (pci_memlist_debug) printf
void
memlist_dump(struct memlist *listp)
diff --git a/usr/src/uts/intel/os/archdep.c b/usr/src/uts/intel/os/archdep.c
index bd5c72a3d8..49473fa2fa 100644
--- a/usr/src/uts/intel/os/archdep.c
+++ b/usr/src/uts/intel/os/archdep.c
@@ -269,7 +269,7 @@ setfpregs(klwp_t *lwp, fpregset_t *fp)
&fpu->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave);
fpu->fpu_regs.kfpu_xstatus =
fp->fp_reg_set.fpchip_state.xstatus;
- fpu->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |=
+ fpu->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |=
(XFEATURE_LEGACY_FP | XFEATURE_SSE);
break;
default:
diff --git a/usr/src/uts/intel/os/fpu.c b/usr/src/uts/intel/os/fpu.c
index 0a9b828288..9644282429 100644
--- a/usr/src/uts/intel/os/fpu.c
+++ b/usr/src/uts/intel/os/fpu.c
@@ -22,7 +22,7 @@
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2021 Joyent, Inc.
* Copyright 2021 RackTop Systems, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -528,23 +528,18 @@ const struct xsave_state avx_initial = {
* The definition below needs to be identical with sse_initial
* defined above.
*/
- {
- FPU_CW_INIT, /* fx_fcw */
- 0, /* fx_fsw */
- 0, /* fx_fctw */
- 0, /* fx_fop */
- 0, /* fx_rip */
- 0, /* fx_rdp */
- SSE_MXCSR_INIT /* fx_mxcsr */
- /* rest of structure is zero */
+ .xs_fxsave = {
+ .fx_fcw = FPU_CW_INIT,
+ .fx_mxcsr = SSE_MXCSR_INIT,
+ },
+ .xs_header = {
+ /*
+ * bit0 = 1 for XSTATE_BV to indicate that legacy fields are
+ * valid, and CPU should initialize XMM/YMM.
+ */
+ .xsh_xstate_bv = 1,
+ .xsh_xcomp_bv = 0,
},
- /*
- * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid,
- * and CPU should initialize XMM/YMM.
- */
- 1,
- 0 /* xs_xcomp_bv */
- /* rest of structure is zero */
};
/*
@@ -656,8 +651,8 @@ fp_new_lwp(void *parent, void *child)
bcopy(&avx_initial, cxs, sizeof (*cxs));
cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
cfx->fx_fcw = fx->fx_fcw;
- cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) &
- XFEATURE_FP_INITIAL);
+ cxs->xs_header.xsh_xstate_bv |=
+ (get_xcr(XFEATURE_ENABLED_MASK) & XFEATURE_FP_INITIAL);
break;
default:
panic("Invalid fp_save_mech");
@@ -973,7 +968,8 @@ fpexterrflt(struct regs *rp)
* Always set LEGACY_FP as it may have been cleared by XSAVE
* instruction
*/
- fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP;
+ fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |=
+ XFEATURE_LEGACY_FP;
break;
default:
panic("Invalid fp_save_mech");
@@ -1154,7 +1150,8 @@ fpsetcw(uint16_t fcw, uint32_t mxcsr)
* Always set LEGACY_FP as it may have been cleared by XSAVE
* instruction
*/
- fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP;
+ fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |=
+ XFEATURE_LEGACY_FP;
break;
default:
panic("Invalid fp_save_mech");
@@ -1177,7 +1174,7 @@ kernel_fpu_fpstate_init(kfpu_state_t *kfpu)
xs = kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_xs;
bzero(xs, cpuid_get_xsave_size());
bcopy(&avx_initial, xs, sizeof (*xs));
- xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
kfpu->kfpu_ctx.fpu_xsave_mask = XFEATURE_FP_ALL;
break;
default:
diff --git a/usr/src/uts/intel/pci_autoconfig/Makefile b/usr/src/uts/intel/pci_autoconfig/Makefile
index 74498aea94..f3c034cb03 100644
--- a/usr/src/uts/intel/pci_autoconfig/Makefile
+++ b/usr/src/uts/intel/pci_autoconfig/Makefile
@@ -57,9 +57,9 @@ ALL_TARGET = $(BINARY)
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
-# Depends on acpica ACPI CA interpreter and PCI-E framework
+# Depends on the platform's resource discovery and PCI-E framework
#
-LDFLAGS += -Nmisc/acpica -Nmisc/pcie
+LDFLAGS += -Nmisc/pcie -Nmisc/pci_prd
#
# Default build targets.
diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h
index dfbcf7dc1c..7423444c60 100644
--- a/usr/src/uts/intel/sys/fp.h
+++ b/usr/src/uts/intel/sys/fp.h
@@ -21,6 +21,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
@@ -230,11 +231,23 @@ struct fxsave_state {
} __aligned(16); /* 512 bytes */
/*
+ * This structure represents the header portion of the data layout used by the
+ * 'xsave' instruction variants. It is documented in section 13.4.2 of the
+ * Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 1
+ * (IASDv1). Although "header" is somewhat of a misnomer, considering the data
+ * begins at offset 512 of the xsave area, its contents dictate which portions
+ * of the area are present and how they may be formatted.
+ */
+struct xsave_header {
+ uint64_t xsh_xstate_bv;
+ uint64_t xsh_xcomp_bv;
+ uint64_t xsh_reserved[6];
+};
+
+/*
* This structure is written to memory by one of the 'xsave' instruction
* variants. The first 512 bytes are compatible with the format of the 'fxsave'
- * area. The header portion of the xsave layout is documented in section
- * 13.4.2 of the Intel 64 and IA-32 Architectures Software Developer’s Manual,
- * Volume 1 (IASDv1). The extended portion is documented in section 13.4.3.
+ * area. The extended portion is documented in section 13.4.3.
*
* Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted
* statically. Enabling additional xsave-related CPU features requires an
@@ -245,9 +258,10 @@ struct fxsave_state {
* determined dynamically by querying the CPU. See the xsave_info structure in
* cpuid.c.
*
- * xsave component usage is tracked using bits in the xs_xstate_bv field. The
- * components are documented in section 13.1 of IASDv1. For easy reference,
- * this is a summary of the currently defined component bit definitions:
+ * xsave component usage is tracked using bits in the xstate_bv field of the
+ * header. The components are documented in section 13.1 of IASDv1. For easy
+ * reference, this is a summary of the currently defined component bit
+ * definitions:
* x87 0x0001
* SSE 0x0002
* AVX 0x0004
@@ -259,21 +273,28 @@ struct fxsave_state {
* PT 0x0100
* PKRU 0x0200
* When xsaveopt_ctxt is being used to save into the xsave_state area, the
- * xs_xstate_bv field is updated by the xsaveopt instruction to indicate which
+ * xstate_bv field is updated by the xsaveopt instruction to indicate which
* elements of the xsave area are active.
*
- * xs_xcomp_bv should always be 0, since we do not currently use the compressed
- * form of xsave (xsavec).
+ * The xcomp_bv field should always be 0, since we do not currently use the
+ * compressed form of xsave (xsavec).
*/
struct xsave_state {
struct fxsave_state xs_fxsave; /* 0-511 legacy region */
- uint64_t xs_xstate_bv; /* 512-519 start xsave header */
- uint64_t xs_xcomp_bv; /* 520-527 */
- uint64_t xs_reserved[6]; /* 528-575 end xsave header */
+ struct xsave_header xs_header; /* 512-575 XSAVE header */
upad128_t xs_ymm[16]; /* 576 AVX component */
} __aligned(64);
/*
+ * While AVX_XSTATE_SIZE is the smallest the kernel will allocate for FPU
+ * state-saving, other consumers may constrain themselves to the minimum
+ * possible xsave state structure, which features only the legacy area and the
+ * bare xsave header.
+ */
+#define MIN_XSAVE_SIZE (sizeof (struct fxsave_state) + \
+ sizeof (struct xsave_header))
+
+/*
* Kernel's FPU save area
*/
typedef struct {