diff options
author | Dan McDonald <danmcd@joyent.com> | 2022-02-18 14:26:58 -0500 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2022-02-18 14:26:58 -0500 |
commit | 6c5a782db588047b641eaac2971cdd95add02bdf (patch) | |
tree | 483a44da347ac8ec1f256591389ad5cb09556dee | |
parent | c14dafef3b7615cc4991d02e1a9cdf506fdbc735 (diff) | |
parent | 59b827862fcc03b4da50df402eeb6288a75ac015 (diff) | |
download | illumos-joyent-6c5a782db588047b641eaac2971cdd95add02bdf.tar.gz |
[illumos-gate merge]
commit c2cd3a449cfa117e3a164f66931fa6c26c762945
14022 zpool online -e breaks access to pool
commit 957246c9e6c47389c40079995d73eebcc659fb29
14456 bhyve needs fpu import/export
commit 4dde95dacc64b35aa9882fcbd0a847355d130734
14501 pcieadm could decode rcld
commit b302a2007db5ab3847583f9a046d41c11789c092
14512 iwn: suspicious concatenation of string literals
commit 6f0e4dc91b854250fff5c24de2d27aed3375ac69
14469 nvme could raise dynamic lun expansion sysevents
commit cd0d4b4073e62fa22997078b1595f399434a1047
14450 Want PCI platform resource discovery module
commit 55855f50d61b53851853bf1fdcdb04d4b63a1734
14488 lex: clean up warnings
Conflicts:
manifest
50 files changed, 1920 insertions, 716 deletions
@@ -1967,6 +1967,7 @@ f platform/i86pc/kernel/misc/amd64/acpidev 0755 root sys f platform/i86pc/kernel/misc/amd64/cpr 0755 root sys f platform/i86pc/kernel/misc/amd64/drmach_acpi 0755 root sys f platform/i86pc/kernel/misc/amd64/gfx_private 0755 root sys +f platform/i86pc/kernel/misc/amd64/pci_prd 0755 root sys d platform/i86pc/kernel/pcbe 0755 root sys d platform/i86pc/kernel/pcbe/amd64 0755 root sys f platform/i86pc/kernel/pcbe/amd64/pcbe.AuthenticAMD 0755 root sys diff --git a/usr/src/cmd/bhyvectl/Makefile b/usr/src/cmd/bhyvectl/Makefile index 01d331c823..486f39da31 100644 --- a/usr/src/cmd/bhyvectl/Makefile +++ b/usr/src/cmd/bhyvectl/Makefile @@ -35,6 +35,9 @@ CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ -I$(SRC)/uts/i86pc LDLIBS += -lvmmapi +# Force c99 for everything +CSTD= $(CSTD_GNU99) + CERRWARN += -_gcc=-Wno-uninitialized # main() is too hairy for smatch diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index 4fc6ddc251..cbe779a4ea 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -39,7 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #include <sys/cdefs.h> @@ -51,6 +51,9 @@ __FBSDID("$FreeBSD$"); #include <sys/errno.h> #include <sys/mman.h> #include <sys/cpuset.h> +#ifndef __FreeBSD__ +#include <sys/fp.h> +#endif /* __FreeBSD__ */ #include <stdio.h> #include <stdlib.h> @@ -312,6 +315,7 @@ static int get_cpu_topology; #ifndef __FreeBSD__ static int pmtmr_port; static int wrlock_cycle; +static int get_fpu; #endif /* @@ -1534,6 +1538,7 @@ setup_options(bool cpu_intel) #ifndef __FreeBSD__ { "pmtmr-port", REQ_ARG, 0, PMTMR_PORT }, { "wrlock-cycle", NO_ARG, &wrlock_cycle, 1 }, + { "get-fpu", NO_ARG, &get_fpu, 1 }, #endif }; @@ -1752,6 +1757,93 @@ show_memseg(struct vmctx *ctx) } } +#ifndef __FreeBSD__ +static int +show_fpu(struct vmctx *ctx, int vcpu) +{ + int res, fd; + + struct vm_fpu_desc_entry entries[64]; + struct vm_fpu_desc desc = { + .vfd_entry_data = entries, + .vfd_num_entries = 64, + }; + fd = vm_get_device_fd(ctx); + res = ioctl(fd, VM_DESC_FPU_AREA, &desc); + if (res != 0) { + return (errno); + } + for (uint_t i = 0; i < desc.vfd_num_entries; i++) { + const struct vm_fpu_desc_entry *entry = &entries[i]; + + /* confirm that AVX fields are where we expect */ + if (entry->vfde_feature == XFEATURE_AVX) { + if (entry->vfde_size != 0x100 || + entry->vfde_off != 0x240) { + (void) fprintf(stderr, + "show_fpu: unexpected AVX size/placement " + "- size:%x off:%x\n", + entry->vfde_size, entry->vfde_off); + return (EINVAL); + } + } + } + void *buf = malloc(desc.vfd_req_size); + if (buf == NULL) { + return (ENOMEM); + } + struct vm_fpu_state req = { + .vcpuid = vcpu, + .buf = buf, + .len = desc.vfd_req_size, + }; + res = ioctl(fd, VM_GET_FPU, &req); + if (res != 0) { + res = errno; + free(buf); + return (res); + } + + const struct xsave_state *state = buf; + const struct fxsave_state *fx = &state->xs_fxsave; + (void) printf("fpu_fcw[%d]\t\t0x%04x\n", vcpu, fx->fx_fcw); + (void) printf("fpu_fsw[%d]\t\t0x%04x\n", vcpu, fx->fx_fsw); + (void) printf("fpu_ftw[%d]\t\t0x%04x\n", vcpu, fx->fx_fctw); + (void) printf("fpu_fop[%d]\t\t0x%04x\n", vcpu, fx->fx_fop); + (void) printf("fpu_rip[%d]\t\t0x%016lx\n", vcpu, fx->fx_rip); + (void) printf("fpu_rdp[%d]\t\t0x%016lx\n", vcpu, fx->fx_rdp); + (void) printf("fpu_mxcsr[%d]\t\t0x%08x\n", vcpu, fx->fx_mxcsr); + (void) printf("fpu_mxcsr_mask[%d]\t0x%08x\n", vcpu, + fx->fx_mxcsr_mask); + /* ST/MMX regs */ + for (uint_t i = 0; i < 8; i++) { + (void) printf("fpu_st%u[%d]\t\t0x%08x%08x%08x%08x\n", vcpu, i, + fx->fx_st[i].__fpr_pad[0], fx->fx_st[i].__fpr_pad[1], + fx->fx_st[i].__fpr_pad[2], fx->fx_st[i].__fpr_pad[3]); + } + /* SSE regs */ + for (uint_t i = 0; i < 16; i++) { + (void) printf("fpu_xmm%u[%d]\t\t0x%08x%08x%08x%08x\n", + i, vcpu, + fx->fx_xmm[i]._l[0], fx->fx_xmm[i]._l[1], + fx->fx_xmm[i]._l[2], fx->fx_xmm[i]._l[3]); + } + + if (state->xs_header.xsh_xstate_bv & XFEATURE_AVX) { + /* AVX regs */ + for (uint_t i = 0; i < 16; i++) { + (void) printf("fpu_ymm%u[%d]\t\t0x%08x%08x%08x%08x\n", + i, vcpu, + state->xs_ymm[i]._l[0], state->xs_ymm[i]._l[1], + state->xs_ymm[i]._l[2], state->xs_ymm[i]._l[3]); + } + } + + free(buf); + return (0); +} +#endif /*__FreeBSD__ */ + int main(int argc, char *argv[]) { @@ -2150,6 +2242,12 @@ main(int argc, char *argv[]) if (!error) error = get_all_segments(ctx, vcpu); +#ifndef __FreeBSD__ + if (!error && (get_fpu || get_all)) { + error = show_fpu(ctx, vcpu); + } +#endif /* __FreeBSD__ */ + if (!error) { if (cpu_intel) error = get_misc_vmcs(ctx, vcpu); diff --git a/usr/src/cmd/pcieadm/pcieadm_cfgspace.c b/usr/src/cmd/pcieadm/pcieadm_cfgspace.c index 420613da75..73841d4c23 100644 --- a/usr/src/cmd/pcieadm/pcieadm_cfgspace.c +++ b/usr/src/cmd/pcieadm/pcieadm_cfgspace.c @@ -4267,6 +4267,91 @@ pcieadm_cap_info_ht(pcieadm_cfgspace_walk_t *walkp, } } +/* + * Root Complex Link Declaration + */ +static pcieadm_regdef_t pcieadm_regdef_rcld_desc[] = { + { 0, 3, "type", "Element Type", PRDV_STRVAL, + .prd_val = { .prdv_strval = { "Configuration Space Element", + "System Egress Port or internal sink", + "Internal Root Complex Link" } } }, + { 8, 15, "num", "Number of Entries", PRDV_HEX }, + { 16, 23, "id", "Component ID", PRDV_HEX }, + { 24, 31, "port", "Port Number", PRDV_HEX }, + { -1, -1, NULL } +}; + +static pcieadm_regdef_t pcieadm_regdef_rcld_link[] = { + { 0, 0, "valid", "Link Valid", PRDV_STRVAL, + .prd_val = { .prdv_strval = { "no", "yes" } } }, + { 1, 1, "type", "Link Type", PRDV_STRVAL, + .prd_val = { .prdv_strval = { "RCRB", "Configuration Space" } } }, + { 2, 2, "rcrb", "Assosciate RCRB", PRDV_STRVAL, + .prd_val = { .prdv_strval = { "no", "yes" } } }, + { 16, 23, "tid", "Target Component ID", PRDV_HEX }, + { 24, 31, "tport", "Target Port Number", PRDV_HEX }, + { -1, -1, NULL } +}; + +/* + * Print a variable number of Root Complex Links. + */ +static void +pcieadm_cfgspace_print_rcld(pcieadm_cfgspace_walk_t *walkp, + pcieadm_cfgspace_print_t *print, void *arg) +{ + uint_t nlinks = walkp->pcw_data->pcb_u8[walkp->pcw_capoff + 5]; + + for (uint_t i = 0; i < nlinks; i++) { + char mshort[32], mhuman[128]; + pcieadm_cfgspace_print_t p; + uint16_t off = print->pcp_off + i * 0x10; + uint8_t type = walkp->pcw_data->pcb_u8[walkp->pcw_capoff + off]; + + (void) snprintf(mshort, sizeof (mshort), "link%udesc", i); + (void) snprintf(mhuman, sizeof (mhuman), "Link %u Description"); + + p.pcp_off = off; + p.pcp_len = 4; + p.pcp_short = mshort; + p.pcp_human = mhuman; + p.pcp_print = pcieadm_cfgspace_print_regdef; + p.pcp_arg = pcieadm_regdef_rcld_link; + + p.pcp_print(walkp, &p, p.pcp_arg); + + /* + * The way that we print the link depends on the actual type of + * link which is in bit 2 of the link description. + */ + p.pcp_off += 8; + + if ((type & (1 << 1)) == 0) { + (void) snprintf(mshort, sizeof (mshort), + "link%uaddr", i); + (void) snprintf(mhuman, sizeof (mhuman), + "Link %u Address"); + p.pcp_len = 8; + p.pcp_print = pcieadm_cfgspace_print_hex; + p.pcp_arg = NULL; + + p.pcp_print(walkp, &p, p.pcp_arg); + } else { + warnx("encountered unsupported RCLD Link Address"); + } + } +} + +static pcieadm_cfgspace_print_t pcieadm_cap_rcld[] = { + { 0x0, 4, "caphdr", "Capability Header", + pcieadm_cfgspace_print_regdef, pcieadm_regdef_pcie_caphdr }, + { 0x4, 4, "desc", "Self Description", + pcieadm_cfgspace_print_regdef, pcieadm_regdef_rcld_desc }, + { 0x10, 0x10, "link", "Link Entry", pcieadm_cfgspace_print_rcld }, + { -1, -1, NULL } +}; + + pcieadm_pci_cap_t pcieadm_pci_caps[] = { { PCI_CAP_ID_PM, "pcipm", "PCI Power Management", pcieadm_cap_info_pcipm, { { 2, 8, pcieadm_cap_pcipm_v3 }, @@ -4319,7 +4404,8 @@ pcieadm_pci_cap_t pcieadm_pcie_caps[] = { { PCIE_EXT_CAP_ID_PWR_BUDGET, "powbudg", "Power Budgeting", pcieadm_cap_info_vers, { { 1, 0x10, pcieadm_cap_powbudg } } }, { PCIE_EXT_CAP_ID_RC_LINK_DECL, "rcld", - "Root Complex Link Declaration" }, + "Root Complex Link Declaration", pcieadm_cap_info_vers, + { { 1, 0x1c, pcieadm_cap_rcld } } }, { PCIE_EXT_CAP_ID_RC_INT_LINKCTRL, "rcilc", "Root Complex Internal Link Control" }, { PCIE_EXT_CAP_ID_RC_EVNT_CEA, "rcecea", diff --git a/usr/src/cmd/sgs/lex/Makefile.com b/usr/src/cmd/sgs/lex/Makefile.com index c8100b1e3f..1b40639314 100644 --- a/usr/src/cmd/sgs/lex/Makefile.com +++ b/usr/src/cmd/sgs/lex/Makefile.com @@ -55,9 +55,8 @@ SRCDIR = ../common CSTD= $(CSTD_GNU99) +# unused labels in yaccpar CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += $(CNOWARN_UNINIT) -CERRWARN += -_gcc=-Wno-parentheses # Override default source file derivation rule (in Makefile.lib) # from objects diff --git a/usr/src/cmd/sgs/lex/common/sub1.c b/usr/src/cmd/sgs/lex/common/sub1.c index f1d3fa601b..e63a55e34b 100644 --- a/usr/src/cmd/sgs/lex/common/sub1.c +++ b/usr/src/cmd/sgs/lex/common/sub1.c @@ -131,15 +131,16 @@ warning(char *s, ...) { va_list ap; - if (!eof) - if (!yyline) + if (!eof) { + if (!yyline) { (void) fprintf(errorf, "Command line: "); - else { + } else { (void) fprintf(errorf, !no_input ? "" : "\"%s\":", sargv[optind]); (void) fprintf(errorf, "line %d: ", yyline); } + } (void) fprintf(errorf, "Warning: "); va_start(ap, s); (void) vfprintf(errorf, s, ap); @@ -171,8 +172,8 @@ index(int a, CHR *s) int alpha(int c) { - return ('a' <= c && c <= 'z' || - 'A' <= c && c <= 'Z'); + return (('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z')); } int @@ -209,7 +210,7 @@ scopy(CHR *s, CHR *t) { CHR *i; i = t; - while (*i++ = *s++) + while ((*i++ = *s++) != 0) ; } @@ -494,7 +495,7 @@ cpycom(CHR *p) (void) putc(*t++, fout); } (void) putc('\n', fout); - while (c = gch()) { + while ((c = gch()) != 0) { while (c == '*') { (void) putc((char)c, fout); if ((c = gch()) == '/') { @@ -570,7 +571,7 @@ cpyact(void) goto swt; (void) putwc(c, fout); savline = yyline; - while (c = gch()) { + while ((c = gch()) != 0) { while (c == '*') { (void) putwc(c, fout); if ((c = gch()) == '/') { @@ -591,7 +592,7 @@ cpyact(void) case '"': /* character string */ mth = c; (void) putwc(c, fout); - while (c = gch()) { + while ((c = gch()) != 0) { if (c == '\\') { (void) putwc(c, fout); c = gch(); diff --git a/usr/src/cmd/sgs/lex/common/sub2.c b/usr/src/cmd/sgs/lex/common/sub2.c index 84ff4e4699..399503d7d9 100644 --- a/usr/src/cmd/sgs/lex/common/sub2.c +++ b/usr/src/cmd/sgs/lex/common/sub2.c @@ -462,9 +462,9 @@ nextstate(int s, int c) for (i = 0; i < num; i++) { curpos = *pos++; j = name[curpos]; - if ((!ISOPERATOR(j)) && j == c || - j == RSTR && c == right[curpos] || - j == RCCL && member(c, (CHR *) left[curpos])) { + if ((!ISOPERATOR(j) && j == c) || + (j == RSTR && c == right[curpos]) || + (j == RCCL && member(c, (CHR *) left[curpos]))) { f = foll[curpos]; number = *f; newpos = f+1; diff --git a/usr/src/cmd/sgs/lex/common/sub3.c b/usr/src/cmd/sgs/lex/common/sub3.c index 107881958d..4b9cea94ab 100644 --- a/usr/src/cmd/sgs/lex/common/sub3.c +++ b/usr/src/cmd/sgs/lex/common/sub3.c @@ -134,12 +134,13 @@ remch(wchar_t c) * Make sure no EUC chars are used in reg. exp. */ if (!handleeuc) { - if (!isascii(c)) + if (!isascii(c)) { if (iswprint(c)) warning( "Non-ASCII character '%wc' in pattern; use -w or -e lex option.", c); else warning( "Non-ASCII character of value %#x in pattern; use -w or -e lex option.", c); + } /* In any case, we don't need to construct ncgidtbl[]. */ return; } @@ -301,7 +302,7 @@ repbycgid(void) symbol[j] = FALSE; s = (CHR *) left[i]; - while (cc = *s++) { + while ((cc = *s++) != 0) { if (cc == RANGE) { int low, high, i; /* @@ -388,7 +389,7 @@ repbycgid(void) static void setsymbol(int i) { - if (i > sizeof (symbol)) + if (i > (int)sizeof (symbol)) error("setsymbol: (SYSERR) %d out of range", i); symbol[i] = TRUE; } diff --git a/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c b/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c index 4697128c90..82c296a669 100644 --- a/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c +++ b/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c @@ -22,6 +22,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. All rights reserved. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -32,15 +33,15 @@ * * When a device is added to the system: * - * 1. Search for any vdevs whose devid matches that of the newly added + * 1. Search for any vdevs whose devid matches that of the newly added * device. * - * 2. If no vdevs are found, then search for any vdevs whose devfs path + * 2. If no vdevs are found, then search for any vdevs whose devfs path * matches that of the new device. * * 3. If no vdevs match by either method, then ignore the event. * - * 4. Attempt to online the device with a flag to indicate that it should + * 4. Attempt to online the device with a flag to indicate that it should * be unspared when resilvering completes. If this succeeds, then the * same device was inserted and we should continue normally. * @@ -319,11 +320,11 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) * string. However, we allow substring matches in the following * cases: * - * <path>: This is a devpath, and the target is one - * of its children. + * <path>: This is a devpath, and the target is one + * of its children. * - * <path/> This is a devid for a whole disk, and - * the target is one of its children. + * <path/> This is a devid for a whole disk, and + * the target is one of its children. */ if (path[len] != '\0' && path[len] != ':' && path[len - 1] != '/') @@ -555,7 +556,7 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data) vdev_state_t newstate; nvlist_t *tgt; - syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n", + syseventd_print(9, "%s: searching for %s in pool %s\n", __func__, devname, zpool_get_name(zhp)); if ((tgt = zpool_find_vdev_by_physpath(zhp, devname, @@ -568,6 +569,11 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data) verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk) == 0); + syseventd_print(9, "%s: " + "found %s in pool %s (wholedisk: %s)\n", __func__, + path, zpool_get_name(zhp), + wholedisk != 0 ? "true" : "false"); + (void) strlcpy(fullpath, path, sizeof (fullpath)); if (wholedisk) { fullpath[strlen(fullpath) - 2] = '\0'; @@ -581,12 +587,13 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data) } if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) { - syseventd_print(9, "zfsdle_vdev_online: setting device" - " device %s to ONLINE state in pool %s.\n", - fullpath, zpool_get_name(zhp)); - if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) + syseventd_print(9, "%s: " + "setting device %s to ONLINE state in pool %s.\n", + __func__, fullpath, zpool_get_name(zhp)); + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) { (void) zpool_vdev_online(zhp, fullpath, 0, &newstate); + } } zpool_close(zhp); return (1); diff --git a/usr/src/compat/bhyve/amd64/machine/fpu.h b/usr/src/compat/bhyve/amd64/machine/fpu.h deleted file mode 100644 index 6bc651d996..0000000000 --- a/usr/src/compat/bhyve/amd64/machine/fpu.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2014 Pluribus Networks Inc. - * Copyright (c) 2018, Joyent, Inc. - */ - -#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_ -#define _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_ - -void fpuexit(kthread_t *td); -void fpurestore(void *); -void fpusave(void *); - -struct savefpu *fpu_save_area_alloc(void); -void fpu_save_area_free(struct savefpu *fsa); -void fpu_save_area_reset(struct savefpu *fsa); - -#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_ */ diff --git a/usr/src/compat/bhyve/amd64/machine/pcb.h b/usr/src/compat/bhyve/amd64/machine/pcb.h deleted file mode 100644 index 75b5de640c..0000000000 --- a/usr/src/compat/bhyve/amd64/machine/pcb.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2014 Pluribus Networks Inc. - */ - -#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_ -#define _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_ - -#include <machine/fpu.h> - -#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_ */ diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index f539ef1f76..beae63a0ca 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -27,6 +27,7 @@ * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ #include <ctype.h> @@ -39,6 +40,7 @@ #include <strings.h> #include <unistd.h> #include <libgen.h> +#include <sys/dkio.h> #include <sys/efi_partition.h> #include <sys/vtoc.h> #include <sys/zfs_ioctl.h> @@ -2801,6 +2803,7 @@ static int zpool_relabel_disk(libzfs_handle_t *hdl, const char *name, const char *msg) { char path[MAXPATHLEN]; + enum dkio_state st; int fd, error; int (*_efi_use_whole_disk)(int); @@ -2822,12 +2825,25 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name, const char *msg) * ignore that error and continue on. */ error = _efi_use_whole_disk(fd); - (void) close(fd); if (error && error != VT_ENOSPC) { + (void) close(fd); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " "relabel '%s': unable to read disk capacity"), name); return (zfs_error(hdl, EZFS_NOCAP, msg)); } + + /* + * Writing a new EFI partition table to the disk will have marked + * the geometry as needing re-validation. Before returning, force + * it to be checked by querying the device state, otherwise the + * subsequent vdev_reopen() will very likely fail to read the device + * size, faulting the pool. + */ + st = DKIO_NONE; + (void) ioctl(fd, DKIOCSTATE, (caddr_t)&st); + + (void) close(fd); + return (0); } diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m index 5b4a7351c4..823ed69a60 100644 --- a/usr/src/pkg/manifests/system-bhyve-tests.p5m +++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m @@ -38,6 +38,7 @@ file path=opt/bhyve-tests/tests/mevent/read_requeue mode=0555 file path=opt/bhyve-tests/tests/mevent/vnode_file mode=0555 file path=opt/bhyve-tests/tests/mevent/vnode_zvol mode=0555 dir path=opt/bhyve-tests/tests/vmm +file path=opt/bhyve-tests/tests/vmm/fpu_getset mode=0555 file path=opt/bhyve-tests/tests/vmm/mem_partial mode=0555 file path=opt/bhyve-tests/tests/vmm/mem_seg_map mode=0555 license lic_CDDL license=lic_CDDL diff --git a/usr/src/pkg/manifests/system-kernel-platform.p5m b/usr/src/pkg/manifests/system-kernel-platform.p5m index a7318e04e4..094eb360db 100644 --- a/usr/src/pkg/manifests/system-kernel-platform.p5m +++ b/usr/src/pkg/manifests/system-kernel-platform.p5m @@ -559,6 +559,8 @@ $(i386_ONLY)file path=platform/i86pc/kernel/misc/$(ARCH64)/acpidev group=sys \ mode=0755 $(i386_ONLY)file path=platform/i86pc/kernel/misc/$(ARCH64)/gfx_private \ group=sys mode=0755 +$(i386_ONLY)file path=platform/i86pc/kernel/misc/$(ARCH64)/pci_prd group=sys \ + mode=0755 $(i386_ONLY)dir path=platform/i86pc/ucode group=sys $(i386_ONLY)dir path=platform/i86xpv group=sys $(i386_ONLY)dir path=platform/i86xpv/kernel group=sys diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run index babfa0f7e9..3055f3e2d8 100644 --- a/usr/src/test/bhyve-tests/runfiles/default.run +++ b/usr/src/test/bhyve-tests/runfiles/default.run @@ -20,7 +20,7 @@ post = outputdir = /var/tmp/test_results [/opt/bhyve-tests/tests/vmm] -tests = ['mem_partial', 'mem_seg_map'] +tests = ['mem_partial', 'mem_seg_map', 'fpu_getset'] # Tests of userspace mevent system, built from cmd/bhyve [/opt/bhyve-tests/tests/mevent] diff --git a/usr/src/test/bhyve-tests/tests/vmm/Makefile b/usr/src/test/bhyve-tests/tests/vmm/Makefile index c91ed9a7e4..30d06a0f6b 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/Makefile +++ b/usr/src/test/bhyve-tests/tests/vmm/Makefile @@ -16,7 +16,8 @@ include $(SRC)/cmd/Makefile.cmd.64 include $(SRC)/test/Makefile.com PROG = mem_partial \ - mem_seg_map + mem_seg_map \ + fpu_getset COMMON_OBJS = common.o CLEAN_OBJS = $(PROG:%=%.o) diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.c b/usr/src/test/bhyve-tests/tests/vmm/common.c index b7f0a30ed0..622a14c61f 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/common.c +++ b/usr/src/test/bhyve-tests/tests/vmm/common.c @@ -23,12 +23,13 @@ #include <vmmapi.h> struct vmctx * -create_test_vm(void) +create_test_vm(const char *test_suite_name) { char name[VM_MAX_NAMELEN]; int res; - (void) snprintf(name, sizeof (name), "bhyve-test-memmap-%d", getpid()); + (void) snprintf(name, sizeof (name), "bhyve-test-%s-%d", + test_suite_name, getpid()); res = vm_create(name, 0); if (res != 0) { diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.h b/usr/src/test/bhyve-tests/tests/vmm/common.h index 7b64574cf2..f210408b71 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/common.h +++ b/usr/src/test/bhyve-tests/tests/vmm/common.h @@ -16,7 +16,7 @@ #ifndef _COMMON_H_ #define _COMMON_H_ -struct vmctx *create_test_vm(void); +struct vmctx *create_test_vm(const char *); int alloc_memseg(struct vmctx *, int, size_t, const char *); #define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) diff --git a/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c b/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c new file mode 100644 index 0000000000..814e15dec3 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c @@ -0,0 +1,333 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <stropts.h> +#include <strings.h> +#include <signal.h> +#include <setjmp.h> +#include <libgen.h> +#include <sys/debug.h> +#include <sys/fp.h> + +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <sys/x86_archext.h> +#include <vmmapi.h> + +#include "common.h" + +/* Minimal xsave state area (sans any AVX storage) */ +struct xsave_min { + struct fxsave_state legacy; + struct xsave_header header; +}; + +CTASSERT(sizeof (struct xsave_min) == MIN_XSAVE_SIZE); + +struct avx_state { + /* 16 x 128-bit: high portions of the ymm registers */ + uint64_t ymm[32]; +}; + +static bool +get_fpu(int fd, struct vm_fpu_state *req) +{ + int res = ioctl(fd, VM_GET_FPU, req); + if (res != 0) { + perror("could not read FPU for vCPU"); + return (false); + } + return (true); +} + +static bool +set_fpu(int fd, struct vm_fpu_state *req) +{ + int res = ioctl(fd, VM_SET_FPU, req); + if (res != 0) { + perror("could not write FPU for vCPU"); + return (false); + } + return (true); +} + +static bool +check_sse(int fd, const struct vm_fpu_desc *desc, void *fpu_area, + size_t fpu_size) +{ + /* Make sure the x87/MMX/SSE state is described as present */ + bool found_fp = false, found_sse = false; + for (uint_t i = 0; i < desc->vfd_num_entries; i++) { + const struct vm_fpu_desc_entry *ent = &desc->vfd_entry_data[i]; + + switch (ent->vfde_feature) { + case XFEATURE_LEGACY_FP: + found_fp = true; + if (ent->vfde_off != 0 || + ent->vfde_size != sizeof (struct fxsave_state)) { + (void) fprintf(stderr, + "unexpected entity for %x: " + "size=%x off=%x\n", ent->vfde_feature, + ent->vfde_size, ent->vfde_off); + return (false); + } + break; + case XFEATURE_SSE: + found_sse = true; + if (ent->vfde_off != 0 || + ent->vfde_size != sizeof (struct fxsave_state)) { + (void) fprintf(stderr, + "unexpected entity for %x: " + "size=%x off=%x\n", ent->vfde_feature, + ent->vfde_size, ent->vfde_off); + return (false); + } + break; + } + } + + if (!found_fp || !found_sse) { + (void) fprintf(stderr, "did not find x87 and SSE area " + "descriptors as expected in initial FPU\n"); + return (false); + } + + struct vm_fpu_state req = { + .vcpuid = 0, + .buf = fpu_area, + .len = fpu_size, + }; + + if (!get_fpu(fd, &req)) { + return (false); + } + + struct xsave_min *xs = fpu_area; + /* + * Executing this test on a freshly-created instance, we expect the FPU + * to only have the legacy and SSE features present in its active state. + */ + if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) { + (void) fprintf(stderr, "bad xstate_bv %lx, expected %lx", + xs->header.xsh_xstate_bv, + (XFEATURE_LEGACY_FP | XFEATURE_SSE)); + return (false); + } + + /* load some SSE values to check for a get/set cycle */ + uint64_t *xmm = (void *)&xs->legacy.fx_xmm[0]; + xmm[0] = UINT64_MAX; + xmm[2] = 1; + + if (!set_fpu(fd, &req)) { + return (false); + } + + /* check that those values made it in/out of the guest FPU */ + bzero(fpu_area, fpu_size); + if (!get_fpu(fd, &req)) { + return (false); + } + if (xmm[0] != UINT64_MAX || xmm[2] != 1) { + (void) fprintf(stderr, "SSE test registers not saved\n"); + return (false); + } + + /* Make sure that a bogus MXCSR value is rejected */ + xs->legacy.fx_mxcsr = UINT32_MAX; + int res = ioctl(fd, VM_SET_FPU, &req); + if (res == 0) { + (void) fprintf(stderr, + "write of invalid MXCSR erroneously allowed\n"); + return (false); + } + + return (true); +} + +static bool +check_avx(int fd, const struct vm_fpu_desc *desc, void *fpu_area, + size_t fpu_size) +{ + bool found_avx = false; + size_t avx_size, avx_off; + for (uint_t i = 0; i < desc->vfd_num_entries; i++) { + const struct vm_fpu_desc_entry *ent = &desc->vfd_entry_data[i]; + + if (ent->vfde_feature == XFEATURE_AVX) { + found_avx = true; + avx_size = ent->vfde_size; + avx_off = ent->vfde_off; + break; + } + } + + if (!found_avx) { + (void) printf("AVX capability not found on host CPU, " + "skipping related tests\n"); + return (true); + } + + if (avx_size != sizeof (struct avx_state)) { + (void) fprintf(stderr, "unexpected AVX state size: %x, " + "expected %x\n", avx_size, sizeof (struct avx_state)); + return (false); + } + if ((avx_off + avx_size) > fpu_size) { + (void) fprintf(stderr, "AVX data falls outside fpu size: " + "%x > %x\n", avx_off + avx_size, fpu_size); + return (false); + } + + struct xsave_min *xs = fpu_area; + struct avx_state *avx = fpu_area + avx_off; + + /* do a simple data round-trip */ + struct vm_fpu_state req = { + .vcpuid = 0, + .buf = fpu_area, + .len = fpu_size, + }; + if (!get_fpu(fd, &req)) { + return (false); + } + + /* With AVX unused so far, we expect it to be absent from the BV */ + if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) { + (void) fprintf(stderr, "bad xstate_bv %lx, expected %lx\n", + xs->header.xsh_xstate_bv, + (XFEATURE_LEGACY_FP | XFEATURE_SSE)); + return (false); + } + + avx->ymm[0] = UINT64_MAX; + avx->ymm[2] = 2; + + /* first write without asserting AVX in BV */ + if (!set_fpu(fd, &req)) { + return (false); + } + + /* And check that the AVX state stays empty */ + bzero(fpu_area, fpu_size); + if (!get_fpu(fd, &req)) { + return (false); + } + if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) { + (void) fprintf(stderr, "xstate_bv changed unexpectedly %lx\n", + xs->header.xsh_xstate_bv); + return (false); + } + if (avx->ymm[0] != 0 || avx->ymm[2] != 0) { + (void) fprintf(stderr, "YMM state changed unexpectedly " + "%lx %lx\n", avx->ymm[0], avx->ymm[2]); + return (false); + } + + /* Now write YMM and set the appropriate AVX BV state */ + avx->ymm[0] = UINT64_MAX; + avx->ymm[2] = 2; + xs->header.xsh_xstate_bv |= XFEATURE_AVX; + if (!set_fpu(fd, &req)) { + return (false); + } + + /* ... and now check that it stuck */ + bzero(fpu_area, fpu_size); + if (!get_fpu(fd, &req)) { + return (false); + } + if ((xs->header.xsh_xstate_bv & XFEATURE_AVX) == 0) { + (void) fprintf(stderr, "AVX missing from xstate_bv %lx\n", + xs->header.xsh_xstate_bv); + return (false); + } + if (avx->ymm[0] != UINT64_MAX || avx->ymm[2] != 2) { + (void) fprintf(stderr, "YMM state not preserved " + "%lx != %lx | %lx != %lx\n", + avx->ymm[0], UINT64_MAX, avx->ymm[2], 2); + return (false); + } + + + return (true); +} + +int +main(int argc, char *argv[]) +{ + struct vmctx *ctx; + int res, fd; + const char *suite_name = basename(argv[0]); + + ctx = create_test_vm(suite_name); + if (ctx == NULL) { + perror("could not open test VM"); + return (EXIT_FAILURE); + } + fd = vm_get_device_fd(ctx); + + struct vm_fpu_desc_entry entries[64]; + struct vm_fpu_desc desc = { + .vfd_entry_data = entries, + .vfd_num_entries = 64, + }; + + res = ioctl(fd, VM_DESC_FPU_AREA, &desc); + if (res != 0) { + perror("could not query fpu area description"); + goto bail; + } + + /* Make sure the XSAVE area described for this machine is reasonable */ + if (desc.vfd_num_entries == 0) { + (void) fprintf(stderr, "no FPU description entries found\n"); + goto bail; + } + if (desc.vfd_req_size < MIN_XSAVE_SIZE) { + (void) fprintf(stderr, "required XSAVE size %lu < " + "expected %lu\n", desc.vfd_req_size, MIN_XSAVE_SIZE); + goto bail; + } + + const size_t fpu_size = desc.vfd_req_size; + void *fpu_area = malloc(fpu_size); + if (fpu_area == NULL) { + perror("could not allocate fpu area"); + goto bail; + } + bzero(fpu_area, fpu_size); + + if (!check_sse(fd, &desc, fpu_area, fpu_size)) { + goto bail; + } + if (!check_avx(fd, &desc, fpu_area, fpu_size)) { + goto bail; + } + + /* mission accomplished */ + vm_destroy(ctx); + (void) printf("%s\tPASS\n", suite_name); + return (EXIT_SUCCESS); + +bail: + vm_destroy(ctx); + (void) printf("%s\tFAIL\n", suite_name); + return (EXIT_FAILURE); +} diff --git a/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c b/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c index b410c673ab..964fdf95c5 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c +++ b/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c @@ -57,8 +57,9 @@ main(int argc, char *argv[]) struct vmctx *ctx; int res, fd; void *guest_mem; + const char *suite_name = basename(argv[0]); - ctx = create_test_vm(); + ctx = create_test_vm(suite_name); if (ctx == NULL) { perror("could open test VM"); return (1); @@ -192,7 +193,7 @@ main(int argc, char *argv[]) } /* mission accomplished */ - (void) printf("%s\tPASS\n", basename(argv[0])); + (void) printf("%s\tPASS\n", suite_name); vm_destroy(ctx); return (0); diff --git a/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c b/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c index e80f18547e..92d90bbf28 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c +++ b/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c @@ -40,8 +40,9 @@ main(int argc, char *argv[]) struct vmctx *ctx; int res, fd; void *seg_obj, *guest_mem; + const char *suite_name = basename(argv[0]); - ctx = create_test_vm(); + ctx = create_test_vm(suite_name); if (ctx == NULL) { perror("could open test VM"); return (1); @@ -129,7 +130,7 @@ main(int argc, char *argv[]) /* mission accomplished */ vm_destroy(ctx); - (void) printf("%s\tPASS\n", basename(argv[0])); + (void) printf("%s\tPASS\n", suite_name); return (0); bail: diff --git a/usr/src/uts/common/io/blkdev/blkdev.c b/usr/src/uts/common/io/blkdev/blkdev.c index c0bdb3dab2..611666b0a1 100644 --- a/usr/src/uts/common/io/blkdev/blkdev.c +++ b/usr/src/uts/common/io/blkdev/blkdev.c @@ -26,6 +26,7 @@ * Copyright 2017 The MathWorks, Inc. All rights reserved. * Copyright 2019 Western Digital Corporation. * Copyright 2020 Joyent, Inc. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ #include <sys/types.h> @@ -55,6 +56,11 @@ #include <sys/note.h> #include <sys/blkdev.h> #include <sys/scsi/impl/inquiry.h> +#include <sys/taskq.h> +#include <sys/taskq_impl.h> +#include <sys/disp.h> +#include <sys/sysevent/eventdefs.h> +#include <sys/sysevent/dev.h> /* * blkdev is a driver which provides a lot of the common functionality @@ -122,8 +128,8 @@ * * Locks * ----- - * There are 4 instance global locks d_ocmutex, d_ksmutex, d_errmutex and - * d_statemutex. As well a q_iomutex per waitq/runq pair. + * There are 5 instance global locks d_ocmutex, d_ksmutex, d_errmutex, + * d_statemutex and d_dle_mutex. As well a q_iomutex per waitq/runq pair. * * Lock Hierarchy * -------------- @@ -139,11 +145,16 @@ typedef struct bd bd_t; typedef struct bd_xfer_impl bd_xfer_impl_t; typedef struct bd_queue bd_queue_t; +typedef enum { + BD_DLE_PENDING = 1 << 0, + BD_DLE_RUNNING = 1 << 1 +} bd_dle_state_t; + struct bd { void *d_private; dev_info_t *d_dip; - kmutex_t d_ocmutex; - kmutex_t d_ksmutex; + kmutex_t d_ocmutex; /* open/close */ + kmutex_t d_ksmutex; /* kstat */ kmutex_t d_errmutex; kmutex_t d_statemutex; kcondvar_t d_statecv; @@ -183,6 +194,10 @@ struct bd { ddi_dma_attr_t d_dma; bd_ops_t d_ops; bd_handle_t d_handle; + + kmutex_t d_dle_mutex; + taskq_ent_t d_dle_ent; + bd_dle_state_t d_dle_state; }; struct bd_handle { @@ -328,20 +343,34 @@ static struct modlinkage modlinkage = { static void *bd_state; static krwlock_t bd_lock; +static taskq_t *bd_taskq; int _init(void) { - int rv; + char taskq_name[TASKQ_NAMELEN]; + const char *name; + int rv; rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); - if (rv != DDI_SUCCESS) { + if (rv != DDI_SUCCESS) return (rv); + + name = mod_modname(&modlinkage); + (void) snprintf(taskq_name, sizeof (taskq_name), "%s_taskq", name); + bd_taskq = taskq_create(taskq_name, 1, minclsyspri, 0, 0, 0); + if (bd_taskq == NULL) { + cmn_err(CE_WARN, "%s: unable to create %s", name, taskq_name); + ddi_soft_state_fini(&bd_state); + return (DDI_FAILURE); } + rw_init(&bd_lock, NULL, RW_DRIVER, NULL); + rv = mod_install(&modlinkage); if (rv != DDI_SUCCESS) { rw_destroy(&bd_lock); + taskq_destroy(bd_taskq); ddi_soft_state_fini(&bd_state); } return (rv); @@ -355,6 +384,7 @@ _fini(void) rv = mod_remove(&modlinkage); if (rv == DDI_SUCCESS) { rw_destroy(&bd_lock); + taskq_destroy(bd_taskq); ddi_soft_state_fini(&bd_state); } return (rv); @@ -696,6 +726,8 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); + mutex_init(&bd->d_dle_mutex, NULL, MUTEX_DRIVER, NULL); + bd->d_dle_state = 0; bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); @@ -853,6 +885,7 @@ fail_drive_info: mutex_destroy(&bd->d_statemutex); mutex_destroy(&bd->d_ocmutex); mutex_destroy(&bd->d_ksmutex); + mutex_destroy(&bd->d_dle_mutex); ddi_soft_state_free(bd_state, inst); return (DDI_FAILURE); } @@ -891,6 +924,7 @@ bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) mutex_destroy(&bd->d_ocmutex); mutex_destroy(&bd->d_statemutex); cv_destroy(&bd->d_statecv); + mutex_destroy(&bd->d_dle_mutex); bd_queues_free(bd); ddi_soft_state_free(bd_state, ddi_get_instance(dip)); return (DDI_SUCCESS); @@ -1890,6 +1924,69 @@ bd_runq_exit(bd_xfer_impl_t *xi, int err) } static void +bd_dle_sysevent_task(void *arg) +{ + nvlist_t *attr = NULL; + char *path = NULL; + bd_t *bd = arg; + dev_info_t *dip = bd->d_dip; + size_t n; + + mutex_enter(&bd->d_dle_mutex); + bd->d_dle_state &= ~BD_DLE_PENDING; + bd->d_dle_state |= BD_DLE_RUNNING; + mutex_exit(&bd->d_dle_mutex); + + dev_err(dip, CE_NOTE, "!dynamic LUN expansion"); + + if (nvlist_alloc(&attr, NV_UNIQUE_NAME_TYPE, KM_SLEEP) != 0) { + mutex_enter(&bd->d_dle_mutex); + bd->d_dle_state &= ~(BD_DLE_RUNNING|BD_DLE_PENDING); + mutex_exit(&bd->d_dle_mutex); + return; + } + + path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + + n = snprintf(path, MAXPATHLEN, "/devices"); + (void) ddi_pathname(dip, path + n); + n = strlen(path); + n += snprintf(path + n, MAXPATHLEN - n, ":x"); + + for (;;) { + /* + * On receipt of this event, the ZFS sysevent module will scan + * active zpools for child vdevs matching this physical path. + * In order to catch both whole disk pools and those with an + * EFI boot partition, generate separate sysevents for minor + * node 'a' and 'b'. (By comparison, io/scsi/targets/sd.c sends + * events for just 'a') + */ + for (char c = 'a'; c < 'c'; c++) { + path[n - 1] = c; + + if (nvlist_add_string(attr, DEV_PHYS_PATH, path) != 0) + break; + + (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, + EC_DEV_STATUS, ESC_DEV_DLE, attr, NULL, DDI_SLEEP); + } + + mutex_enter(&bd->d_dle_mutex); + if ((bd->d_dle_state & BD_DLE_PENDING) == 0) { + bd->d_dle_state &= ~BD_DLE_RUNNING; + mutex_exit(&bd->d_dle_mutex); + break; + } + bd->d_dle_state &= ~BD_DLE_PENDING; + mutex_exit(&bd->d_dle_mutex); + } + + nvlist_free(attr); + kmem_free(path, MAXPATHLEN); +} + +static void bd_update_state(bd_t *bd) { enum dkio_state state = DKIO_INSERTED; @@ -1908,8 +2005,7 @@ bd_update_state(bd_t *bd) if ((media.m_blksize < 512) || (!ISP2(media.m_blksize)) || (P2PHASE(bd->d_maxxfer, media.m_blksize))) { - cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)", - ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip), + dev_err(bd->d_dip, CE_WARN, "Invalid media block size (%d)", media.m_blksize); /* * We can't use the media, treat it as not present. @@ -1954,6 +2050,21 @@ done: if (docmlb) { if (state == DKIO_INSERTED) { (void) cmlb_validate(bd->d_cmlbh, 0, 0); + + mutex_enter(&bd->d_dle_mutex); + /* + * If there is already an event pending, there's + * nothing to do; we coalesce multiple events. + */ + if ((bd->d_dle_state & BD_DLE_PENDING) == 0) { + if ((bd->d_dle_state & BD_DLE_RUNNING) == 0) { + taskq_dispatch_ent(bd_taskq, + bd_dle_sysevent_task, bd, 0, + &bd->d_dle_ent); + } + bd->d_dle_state |= BD_DLE_PENDING; + } + mutex_exit(&bd->d_dle_mutex); } else { cmlb_invalidate(bd->d_cmlbh, 0); } diff --git a/usr/src/uts/common/io/iwn/if_iwnreg.h b/usr/src/uts/common/io/iwn/if_iwnreg.h index 78bfc3088f..e6d6d6d4b8 100644 --- a/usr/src/uts/common/io/iwn/if_iwnreg.h +++ b/usr/src/uts/common/io/iwn/if_iwnreg.h @@ -100,10 +100,10 @@ #define IWN_MEM_WADDR 0x410 #define IWN_MEM_WDATA 0x418 #define IWN_MEM_RDATA 0x41c -#define IWN_PRPH_WADDR 0x444 -#define IWN_PRPH_RADDR 0x448 -#define IWN_PRPH_WDATA 0x44c -#define IWN_PRPH_RDATA 0x450 +#define IWN_PRPH_WADDR 0x444 +#define IWN_PRPH_RADDR 0x448 +#define IWN_PRPH_WDATA 0x44c +#define IWN_PRPH_RDATA 0x450 #define IWN_HBUS_TARG_WRPTR 0x460 /* @@ -1694,8 +1694,8 @@ static const struct iwn_chan_band { { 11, { 36, 44, 52, 60, 100, 108, 116, 124, 132, 149, 157 } } }; -#define IWN1000_OTP_NBLOCKS 3 -#define IWN6000_OTP_NBLOCKS 4 +#define IWN1000_OTP_NBLOCKS 3 +#define IWN6000_OTP_NBLOCKS 4 #define IWN6050_OTP_NBLOCKS 7 /* HW rate indices. */ @@ -1971,7 +1971,7 @@ static const char * const iwn_fw_errmsg[] = { "NMI_INTERRUPT_DATA_ACTION_PT", "NMI_TRM_HW_ER", "NMI_INTERRUPT_TRM", - "NMI_INTERRUPT_BREAKPOINT" + "NMI_INTERRUPT_BREAKPOINT", "DEBUG_0", "DEBUG_1", "DEBUG_2", diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c index ad076201e0..e9c779e323 100644 --- a/usr/src/uts/common/io/nvme/nvme.c +++ b/usr/src/uts/common/io/nvme/nvme.c @@ -445,6 +445,8 @@ static int nvme_open(dev_t *, int, int, cred_t *); static int nvme_close(dev_t, int, int, cred_t *); static int nvme_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); +static void nvme_changed_ns(nvme_t *, int); + static ddi_ufm_ops_t nvme_ufm_ops = { NULL, nvme_ufm_fill_image, @@ -1955,11 +1957,7 @@ nvme_async_event_task(void *arg) if (nsid == 0) /* end of list */ break; - - dev_err(nvme->n_dip, CE_CONT, - "namespace %u (%s) has changed.\n", - nsid, nvme->n_ns[nsid - 1].ns_name); - /* TODO: handle namespace resize. */ + nvme_changed_ns(nvme, nsid); } break; @@ -2693,6 +2691,41 @@ nvme_prepare_devid(nvme_t *nvme, uint32_t nsid) nvme->n_idctl->id_vid, model, serial, nsid); } +static void +nvme_changed_ns(nvme_t *nvme, int nsid) +{ + nvme_namespace_t *ns = &nvme->n_ns[nsid - 1]; + nvme_identify_nsid_t *idns, *oidns; + + dev_err(nvme->n_dip, CE_NOTE, "!namespace %u (%s) has changed.", + nsid, ns->ns_name); + + if (ns->ns_ignore) + return; + + /* + * The namespace has changed in some way. At present, we only update + * the device capacity and trigger blkdev to check the device state. + */ + + if (nvme_identify(nvme, B_FALSE, nsid, (void **)&idns) != 0) { + dev_err(nvme->n_dip, CE_WARN, + "!failed to identify namespace %d", nsid); + return; + } + + oidns = ns->ns_idns; + ns->ns_idns = idns; + kmem_free(oidns, sizeof (nvme_identify_nsid_t)); + + ns->ns_block_count = idns->id_nsize; + ns->ns_block_size = + 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads; + ns->ns_best_block_size = ns->ns_block_size; + + bd_state_change(ns->ns_bd_hdl); +} + static int nvme_init_ns(nvme_t *nvme, int nsid) { diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index 24fdd94c11..fed179dad1 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -1136,6 +1136,9 @@ NXGEHDRS= \ nxge_virtual.h \ nxge_espc.h +PLATHDRS= \ + pci_prd.h + include Makefile.syshdrs dcam/%.check: dcam/%.h @@ -1203,7 +1206,8 @@ CHECKHDRS= \ $(I1394HDRS:%.h=1394/%.check) \ $(RSMHDRS:%.h=rsm/%.check) \ $(TSOLHDRS:%.h=tsol/%.check) \ - $(NXGEHDRS:%.h=nxge/%.check) + $(NXGEHDRS:%.h=nxge/%.check) \ + $(PLATHDRS:%.h=plat/%.check) .KEEP_STATE: @@ -1243,6 +1247,7 @@ CHECKHDRS= \ $(ROOTTAVORHDRS) \ $(ROOTHERMONHDRS) \ $(ROOTMLNXHDRS) \ + $(ROOTPLATHDRS) \ $(ROOTSCSIHDRS) \ $(ROOTSCSIADHDRS) \ $(ROOTSCSICONFHDRS) \ @@ -1311,6 +1316,7 @@ install_h: \ $(ROOTTAVORHDRS) \ $(ROOTHERMONHDRS) \ $(ROOTMLNXHDRS) \ + $(ROOTPLATHDRS) \ $(ROOTSCSIHDRS) \ $(ROOTSCSIADHDRS) \ $(ROOTSCSIISCSIHDRS) \ diff --git a/usr/src/uts/common/sys/plat/pci_prd.h b/usr/src/uts/common/sys/plat/pci_prd.h new file mode 100644 index 0000000000..aa0a7932b8 --- /dev/null +++ b/usr/src/uts/common/sys/plat/pci_prd.h @@ -0,0 +1,130 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#ifndef _SYS_PLAT_PCI_PRD_H +#define _SYS_PLAT_PCI_PRD_H + +/* + * PCI Platform Resource Discovery (PRD) + * + * This file forms the platform-specific interfaces that a given platform must + * implement to support the discovery of PCI resources. In particular: + * + * o Any root complexes that do not show up through the use of normal scanning + * o Available resources per root-port including: + * + I/O ports + * + Prefetchable Memory + * + Normal Memory + * + PCI buses + * o The naming of slots (the platform uses the PCIe default) + * + * These interfaces are all expected to be implemented by a platform's 'pci_prd' + * module. This is left as a module and not a part of say, unix, so that it can + * in turn depend on other modules that a platform might require, such as ACPI. + * + * In general, unless otherwise indicated, these interfaces will always be + * called from kernel context, typically during boot. The interfaces will only + * be called from a single thread at this time and any locking is managed at a + * layer outside of the pci_prd interfaces. If the subsystem is using some other + * interfaces that may be used by multiple consumers and needs locking (e.g. + * ACPI), then that still must be considered in the design and implementation. + */ + +#include <sys/types.h> +#include <sys/memlist.h> +#include <sys/sunddi.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Resource types that can be asked after. + */ +typedef enum pci_prd_rsrc { + PCI_PRD_R_IO, + PCI_PRD_R_MMIO, + PCI_PRD_R_PREFETCH, + PCI_PRD_R_BUS +} pci_prd_rsrc_t; + +typedef struct pci_prd_upcalls { + /* + * Return a dev_info_t, if one exists, for this PCI bus. + */ + dev_info_t *(*pru_bus2dip_f)(uint32_t); +} pci_prd_upcalls_t; + +/* + * Initialization and teardown functions that will be used by the PCI + * enumeration code when it attaches and detaches. If all work is done before + * these come up, there is nothing to do; however, after a call to the _init() + * function, it is expected that the platform module will be ready to respond to + * all function calls. + * + * Note that the _fini function may never be called as on a typical system, as + * any PCI(e) devices with attached drivers will result in the PRD consumer + * remaining loaded. + */ +extern int pci_prd_init(pci_prd_upcalls_t *); +extern void pci_prd_fini(void); + +/* + * Return the maximum PCI bus on this platform that should be searched. This + * number is the last bus number that should be scanned. e.g. a value of 0x10 + * indicates that we will search buses [0, 0x10]. In general, it is expected + * that platforms will just return 0xff (PCI_MAX_BUS_NUM - 1) unless for some + * reason it has other knowledge here. + */ +extern uint32_t pci_prd_max_bus(void); + +/* + * Look up a set of resources that should be assigned to the PCI bus. In + * general, it is expected that these are only the buses that are assigned to + * root complexes. + */ +extern struct memlist *pci_prd_find_resource(uint32_t, pci_prd_rsrc_t); + +/* + * Originally when only using BIOS-derived (pre-ACPI) sources on i86pc, the + * ability to utilize data about multiple buses was considered suspect. As such, + * this exists as a way to indicate that resources on each root complex are + * actually valid. + */ +extern boolean_t pci_prd_multi_root_ok(void); + +/* + * This is used to allow the PCI enumeration code to ask the platform about any + * PCI root complexes that it might know about which might not be discovered + * through the normal scanning process. One callback will be emitted for each + * PCI bus via a call to the callback function. The return value of the callback + * function determines whether we should continue iterating (B_TRUE) or + * terminate (B_FALSE). + */ +typedef boolean_t (*pci_prd_root_complex_f)(uint32_t, void *); +extern void pci_prd_root_complex_iter(pci_prd_root_complex_f, void *); + +/* + * Give the chance for a platform file to go through and use knowledge that it + * has (such as the traditional BIOS PCI IRQ routing table) to name the PCI(e) + * slot. + */ +extern void pci_prd_slot_name(uint32_t, dev_info_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_PLAT_PCI_PRD_H */ diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files index 3f387f508c..e29d11b64b 100644 --- a/usr/src/uts/i86pc/Makefile.files +++ b/usr/src/uts/i86pc/Makefile.files @@ -25,7 +25,7 @@ # Copyright (c) 2010, Intel Corporation. # Copyright 2019 OmniOS Community Edition (OmniOSce) Association. # Copyright 2020 Joyent, Inc. -# Copyright 2021 Oxide Computer Company +# Copyright 2022 Oxide Computer Company # Copyright 2021 Jason King # # This Makefile defines file modules in the directory uts/i86pc @@ -287,6 +287,8 @@ VIONA_OBJS += viona_main.o \ PPT_OBJS += ppt.o +PCI_PRD_OBJS += pci_prd_i86pc.o pci_memlist.o + # # Build up defines and paths. # diff --git a/usr/src/uts/i86pc/Makefile.i86pc b/usr/src/uts/i86pc/Makefile.i86pc index 4a11adcaa2..cd1f6e2db9 100644 --- a/usr/src/uts/i86pc/Makefile.i86pc +++ b/usr/src/uts/i86pc/Makefile.i86pc @@ -294,7 +294,7 @@ SYS_KMODS += # # 'Misc' Modules (/kernel/misc): # -MISC_KMODS += gfx_private pcie +MISC_KMODS += gfx_private pcie pci_prd MISC_KMODS += acpidev MISC_KMODS += drmach_acpi diff --git a/usr/src/uts/intel/io/pci/mps_table.h b/usr/src/uts/i86pc/io/pci/mps_table.h index 8f8c1dc24e..df693eb091 100644 --- a/usr/src/uts/intel/io/pci/mps_table.h +++ b/usr/src/uts/i86pc/io/pci/mps_table.h @@ -29,8 +29,6 @@ #ifndef _MPS_TABLE_H #define _MPS_TABLE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -42,7 +40,7 @@ struct mps_fps_hdr { /* MP Floating Pointer Structure */ uchar_t fps_len; /* in paragraph (16-bytes units) */ uchar_t fps_spec_rev; /* MP Spec. version no. */ uchar_t fps_cksum; /* checksum of complete structure */ - uchar_t fps_featinfo1; /* mp feature info byte 1 */ + uchar_t fps_featinfo1; /* mp feature info byte 1 */ uchar_t fps_featinfo2; /* mp feature info byte 2 */ uchar_t fps_featinfo3; /* mp feature info byte 3 */ uchar_t fps_featinfo4; /* mp feature info byte 4 */ @@ -51,7 +49,7 @@ struct mps_fps_hdr { /* MP Floating Pointer Structure */ struct mps_ct_hdr { /* MP Configuration Table Header */ uint32_t ct_sig; /* "PCMP" */ - uint16_t ct_len; /* base configuration in bytes */ + uint16_t ct_len; /* base configuration in bytes */ uchar_t ct_spec_rev; /* MP Spec. version no. */ uchar_t ct_cksum; /* base configuration table checksum */ char ct_oem_id[8]; /* string identifies the manufacturer */ @@ -60,7 +58,7 @@ struct mps_ct_hdr { /* MP Configuration Table Header */ uint16_t ct_oem_tbl_len; /* size of base OEM table in bytes */ uint16_t ct_entry_cnt; /* no. of entries in the base table */ uint32_t ct_local_apic; /* paddr of local APIC */ - uint16_t ct_ext_tbl_len; /* extended table in bytes */ + uint16_t ct_ext_tbl_len; /* extended table in bytes */ uchar_t ct_ext_cksum; /* checksum for the extended table */ }; diff --git a/usr/src/uts/intel/io/pci/pci_resource.c b/usr/src/uts/i86pc/io/pci/pci_prd_i86pc.c index 57dbe12427..5ba872655c 100644 --- a/usr/src/uts/intel/io/pci/pci_resource.c +++ b/usr/src/uts/i86pc/io/pci/pci_prd_i86pc.c @@ -19,54 +19,63 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Joyent, Inc. + * Copyright 2019 Western Digital Corporation * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 Oxide Computer Company + */ + +/* + * This file contains the x86 PCI platform resource discovery backend. This uses + * data from a combination of sources, preferring ACPI, if present, and if not, + * falling back to either the PCI hot-plug resource table or the mps tables. * - * pci_resource.c -- routines to retrieve available bus resources from - * the MP Spec. Table and Hotplug Resource Table + * Today, to get information from ACPI we need to start from a dev_info_t. This + * is partly why the PRD interface has a callback for getting information about + * a dev_info_t. It also means we cannot initialize the tables with information + * until all devices have been initially scanned. */ #include <sys/types.h> #include <sys/memlist.h> +#include <sys/pci.h> #include <sys/pci_impl.h> +#include <sys/pci_cfgspace_impl.h> +#include <sys/sunndi.h> #include <sys/systm.h> #include <sys/cmn_err.h> #include <sys/acpi/acpi.h> #include <sys/acpica.h> +#include <sys/plat/pci_prd.h> #include "mps_table.h" #include "pcihrt.h" -extern int pci_boot_debug; extern int pci_bios_maxbus; -#define dprintf if (pci_boot_debug) printf + +int pci_prd_debug = 0; +#define dprintf if (pci_prd_debug) printf +#define dcmn_err if (pci_prd_debug != 0) cmn_err static int tbl_init = 0; static uchar_t *mps_extp = NULL; static uchar_t *mps_ext_endp = NULL; static struct php_entry *hrt_hpep; -static int hrt_entry_cnt = 0; +static uint_t hrt_entry_cnt = 0; static int acpi_cb_cnt = 0; +static pci_prd_upcalls_t *prd_upcalls; static void mps_probe(void); static void acpi_pci_probe(void); -static int mps_find_bus_res(int, int, struct memlist **); +static int mps_find_bus_res(uint32_t, pci_prd_rsrc_t, struct memlist **); static void hrt_probe(void); -static int hrt_find_bus_res(int, int, struct memlist **); -static int acpi_find_bus_res(int, int, struct memlist **); +static int hrt_find_bus_res(uint32_t, pci_prd_rsrc_t, struct memlist **); +static int acpi_find_bus_res(uint32_t, pci_prd_rsrc_t, struct memlist **); static uchar_t *find_sig(uchar_t *cp, int len, char *sig); static int checksum(unsigned char *cp, int len); static ACPI_STATUS acpi_wr_cb(ACPI_RESOURCE *rp, void *context); -void bus_res_fini(void); static void acpi_trim_bus_ranges(void); -struct memlist *acpi_io_res[256]; -struct memlist *acpi_mem_res[256]; -struct memlist *acpi_pmem_res[256]; -struct memlist *acpi_bus_res[256]; - /* * -1 = attempt ACPI resource discovery * 0 = don't attempt ACPI resource discovery @@ -74,53 +83,35 @@ struct memlist *acpi_bus_res[256]; */ volatile int acpi_resource_discovery = -1; -struct memlist * -find_bus_res(int bus, int type) -{ - struct memlist *res = NULL; - boolean_t bios = B_TRUE; - - /* if efi-systab property exist, there is no BIOS */ - if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), DDI_PROP_DONTPASS, - "efi-systab")) { - bios = B_FALSE; - } - - if (tbl_init == 0) { - tbl_init = 1; - acpi_pci_probe(); - if (bios) { - hrt_probe(); - mps_probe(); - } - } - - if (acpi_find_bus_res(bus, type, &res) > 0) - return (res); - - if (bios && hrt_find_bus_res(bus, type, &res) > 0) - return (res); +struct memlist *acpi_io_res[PCI_MAX_BUS_NUM]; +struct memlist *acpi_mem_res[PCI_MAX_BUS_NUM]; +struct memlist *acpi_pmem_res[PCI_MAX_BUS_NUM]; +struct memlist *acpi_bus_res[PCI_MAX_BUS_NUM]; - if (bios) - (void) mps_find_bus_res(bus, type, &res); - return (res); -} +/* + * This indicates whether or not we have a traditional x86 BIOS present or not. + */ +static boolean_t pci_prd_have_bios = B_TRUE; +/* + * This value is set up as part of PCI configuration space initialization. + */ +extern int pci_bios_maxbus; static void acpi_pci_probe(void) { ACPI_HANDLE ah; - dev_info_t *dip; int bus; if (acpi_resource_discovery == 0) return; for (bus = 0; bus <= pci_bios_maxbus; bus++) { - /* if no dip or no ACPI handle, no resources to discover */ - dip = pci_bus_res[bus].dip; - if ((dip == NULL) || + dev_info_t *dip; + + dip = prd_upcalls->pru_bus2dip_f(bus); + if (dip == NULL || (ACPI_FAILURE(acpica_get_handle(dip, &ah)))) continue; @@ -142,7 +133,7 @@ acpi_pci_probe(void) * be trimmed to "0..7", in the example). */ static void -acpi_trim_bus_ranges() +acpi_trim_bus_ranges(void) { struct memlist *ranges, *current; int bus; @@ -154,7 +145,7 @@ acpi_trim_bus_ranges() * - there exists at most 1 bus range entry for each bus number * - there are no (broken) ranges that start at the same bus number */ - for (bus = 0; bus < 256; bus++) { + for (bus = 0; bus < PCI_MAX_BUS_NUM; bus++) { struct memlist *prev, *orig, *new; /* skip buses with no range entry */ if ((orig = acpi_bus_res[bus]) == NULL) @@ -211,20 +202,21 @@ acpi_trim_bus_ranges() } static int -acpi_find_bus_res(int bus, int type, struct memlist **res) +acpi_find_bus_res(uint32_t bus, pci_prd_rsrc_t type, struct memlist **res) { + ASSERT3U(bus, <, PCI_MAX_BUS_NUM); switch (type) { - case IO_TYPE: + case PCI_PRD_R_IO: *res = acpi_io_res[bus]; break; - case MEM_TYPE: + case PCI_PRD_R_MMIO: *res = acpi_mem_res[bus]; break; - case PREFETCH_TYPE: + case PCI_PRD_R_PREFETCH: *res = acpi_pmem_res[bus]; break; - case BUSRANGE_TYPE: + case PCI_PRD_R_BUS: *res = acpi_bus_res[bus]; break; default: @@ -236,19 +228,6 @@ acpi_find_bus_res(int bus, int type, struct memlist **res) return (memlist_count(*res)); } -void -bus_res_fini(void) -{ - int bus; - - for (bus = 0; bus <= pci_bios_maxbus; bus++) { - memlist_free_all(&acpi_io_res[bus]); - memlist_free_all(&acpi_mem_res[bus]); - memlist_free_all(&acpi_pmem_res[bus]); - memlist_free_all(&acpi_bus_res[bus]); - } -} - static struct memlist ** rlistpp(UINT8 t, UINT8 caching, int bus) { @@ -298,7 +277,7 @@ acpi_dbg(uint_t bus, uint64_t addr, uint64_t len, uint8_t caching, uint8_t type, } -ACPI_STATUS +static ACPI_STATUS acpi_wr_cb(ACPI_RESOURCE *rp, void *context) { int bus = (intptr_t)context; @@ -332,7 +311,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context) acpi_cb_cnt++; memlist_insert(&acpi_io_res[bus], rp->Data.Io.Minimum, rp->Data.Io.AddressLength); - if (pci_boot_debug != 0) { + if (pci_prd_debug != 0) { acpi_dbg(bus, rp->Data.Io.Minimum, rp->Data.Io.AddressLength, 0, ACPI_IO_RANGE, "IO"); } @@ -374,7 +353,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context) rp->Data.Address.Info.Mem.Caching, bus), rp->Data.Address16.Address.Minimum, rp->Data.Address16.Address.AddressLength); - if (pci_boot_debug != 0) { + if (pci_prd_debug != 0) { acpi_dbg(bus, rp->Data.Address16.Address.Minimum, rp->Data.Address16.Address.AddressLength, @@ -391,7 +370,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context) rp->Data.Address.Info.Mem.Caching, bus), rp->Data.Address32.Address.Minimum, rp->Data.Address32.Address.AddressLength); - if (pci_boot_debug != 0) { + if (pci_prd_debug != 0) { acpi_dbg(bus, rp->Data.Address32.Address.Minimum, rp->Data.Address32.Address.AddressLength, @@ -409,7 +388,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context) rp->Data.Address.Info.Mem.Caching, bus), rp->Data.Address64.Address.Minimum, rp->Data.Address64.Address.AddressLength); - if (pci_boot_debug != 0) { + if (pci_prd_debug != 0) { acpi_dbg(bus, rp->Data.Address64.Address.Minimum, rp->Data.Address64.Address.AddressLength, @@ -426,7 +405,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context) rp->Data.Address.Info.Mem.Caching, bus), rp->Data.ExtAddress64.Address.Minimum, rp->Data.ExtAddress64.Address.AddressLength); - if (pci_boot_debug != 0) { + if (pci_prd_debug != 0) { acpi_dbg(bus, rp->Data.ExtAddress64.Address.Minimum, rp->Data.ExtAddress64.Address.AddressLength, @@ -450,7 +429,7 @@ acpi_wr_cb(ACPI_RESOURCE *rp, void *context) } static void -mps_probe() +mps_probe(void) { uchar_t *extp; struct mps_fps_hdr *fpp = NULL; @@ -521,22 +500,43 @@ mps_probe() static int -mps_find_bus_res(int bus, int type, struct memlist **res) +mps_find_bus_res(uint32_t bus, pci_prd_rsrc_t rsrc, struct memlist **res) { struct sasm *sasmp; uchar_t *extp; - int res_cnt; + int res_cnt, type; + + ASSERT3U(bus, <, PCI_MAX_BUS_NUM); if (mps_extp == NULL) return (0); + + switch (rsrc) { + case PCI_PRD_R_IO: + type = IO_TYPE; + break; + case PCI_PRD_R_MMIO: + type = MEM_TYPE; + break; + case PCI_PRD_R_PREFETCH: + type = PREFETCH_TYPE; + break; + case PCI_PRD_R_BUS: + type = BUSRANGE_TYPE; + break; + default: + *res = NULL; + return (0); + } + extp = mps_extp; res_cnt = 0; while (extp < mps_ext_endp) { switch (*extp) { case SYS_AS_MAPPING: sasmp = (struct sasm *)extp; - if (((int)sasmp->sasm_as_type) == type && - ((int)sasmp->sasm_bus_id) == bus) { + if (sasmp->sasm_as_type == type && + sasmp->sasm_bus_id == bus) { uint64_t base, len; base = (uint64_t)sasmp->sasm_as_base | @@ -558,11 +558,7 @@ mps_find_bus_res(int bus, int type, struct memlist **res) cmn_err(CE_WARN, "Unknown descriptor type %d" " in BIOS Multiprocessor Spec table.", *extp); - while (*res) { - struct memlist *tmp = *res; - *res = tmp->ml_next; - memlist_free(tmp); - } + memlist_free_all(res); return (0); } } @@ -570,7 +566,7 @@ mps_find_bus_res(int bus, int type, struct memlist **res) } static void -hrt_probe() +hrt_probe(void) { struct hrt_hdr *hrtp; @@ -585,44 +581,46 @@ hrt_probe() dprintf("PCI Hot-Plug Resource Table version no. <> 1\n"); return; } - hrt_entry_cnt = (int)hrtp->hrt_entry_cnt; + hrt_entry_cnt = (uint_t)hrtp->hrt_entry_cnt; dprintf("No. of PCI hot-plug slot entries = 0x%x\n", hrt_entry_cnt); hrt_hpep = (struct php_entry *)(hrtp + 1); } static int -hrt_find_bus_res(int bus, int type, struct memlist **res) +hrt_find_bus_res(uint32_t bus, pci_prd_rsrc_t type, struct memlist **res) { - int res_cnt, i; + int res_cnt; struct php_entry *hpep; + ASSERT3U(bus, <, PCI_MAX_BUS_NUM); + if (hrt_hpep == NULL || hrt_entry_cnt == 0) return (0); hpep = hrt_hpep; res_cnt = 0; - for (i = 0; i < hrt_entry_cnt; i++, hpep++) { + for (uint_t i = 0; i < hrt_entry_cnt; i++, hpep++) { if (hpep->php_pri_bus != bus) continue; - if (type == IO_TYPE) { + if (type == PCI_PRD_R_IO) { if (hpep->php_io_start == 0 || hpep->php_io_size == 0) continue; memlist_insert(res, (uint64_t)hpep->php_io_start, (uint64_t)hpep->php_io_size); res_cnt++; - } else if (type == MEM_TYPE) { + } else if (type == PCI_PRD_R_MMIO) { if (hpep->php_mem_start == 0 || hpep->php_mem_size == 0) continue; memlist_insert(res, - (uint64_t)(((int)hpep->php_mem_start) << 16), - (uint64_t)(((int)hpep->php_mem_size) << 16)); + ((uint64_t)hpep->php_mem_start) << 16, + ((uint64_t)hpep->php_mem_size) << 16); res_cnt++; - } else if (type == PREFETCH_TYPE) { + } else if (type == PCI_PRD_R_PREFETCH) { if (hpep->php_pfmem_start == 0 || hpep->php_pfmem_size == 0) continue; memlist_insert(res, - (uint64_t)(((int)hpep->php_pfmem_start) << 16), - (uint64_t)(((int)hpep->php_pfmem_size) << 16)); + ((uint64_t)hpep->php_pfmem_start) << 16, + ((uint64_t)hpep->php_pfmem_size) << 16); res_cnt++; } } @@ -656,67 +654,226 @@ checksum(unsigned char *cp, int len) return ((int)(cksum & 0xFF)); } -#ifdef UNUSED_BUS_HIERARY_INFO +uint32_t +pci_prd_max_bus(void) +{ + return ((uint32_t)pci_bios_maxbus); +} + +struct memlist * +pci_prd_find_resource(uint32_t bus, pci_prd_rsrc_t rsrc) +{ + struct memlist *res = NULL; + + if (bus > pci_bios_maxbus) + return (NULL); + + if (tbl_init == 0) { + tbl_init = 1; + acpi_pci_probe(); + if (pci_prd_have_bios) { + hrt_probe(); + mps_probe(); + } + } + + if (acpi_find_bus_res(bus, rsrc, &res) > 0) + return (res); + + if (pci_prd_have_bios && hrt_find_bus_res(bus, rsrc, &res) > 0) + return (res); + + if (pci_prd_have_bios) + (void) mps_find_bus_res(bus, rsrc, &res); + return (res); +} + +typedef struct { + pci_prd_root_complex_f ppac_func; + void *ppac_arg; +} pci_prd_acpi_cb_t; + +static ACPI_STATUS +pci_process_acpi_device(ACPI_HANDLE hdl, UINT32 level, void *ctx, void **rv) +{ + ACPI_DEVICE_INFO *adi; + int busnum; + pci_prd_acpi_cb_t *cb = ctx; + + /* + * Use AcpiGetObjectInfo() to find the device _HID + * If not a PCI root-bus, ignore this device and continue + * the walk + */ + if (ACPI_FAILURE(AcpiGetObjectInfo(hdl, &adi))) + return (AE_OK); + + if (!(adi->Valid & ACPI_VALID_HID)) { + AcpiOsFree(adi); + return (AE_OK); + } + + if (strncmp(adi->HardwareId.String, PCI_ROOT_HID_STRING, + sizeof (PCI_ROOT_HID_STRING)) && + strncmp(adi->HardwareId.String, PCI_EXPRESS_ROOT_HID_STRING, + sizeof (PCI_EXPRESS_ROOT_HID_STRING))) { + AcpiOsFree(adi); + return (AE_OK); + } + + AcpiOsFree(adi); + + /* + * acpica_get_busno() will check the presence of _BBN and + * fail if not present. It will then use the _CRS method to + * retrieve the actual bus number assigned, it will fall back + * to _BBN should the _CRS method fail. + */ + if (ACPI_SUCCESS(acpica_get_busno(hdl, &busnum))) { + /* + * Ignore invalid _BBN return values here (rather + * than panic) and emit a warning; something else + * may suffer failure as a result of the broken BIOS. + */ + if (busnum < 0) { + dcmn_err(CE_NOTE, + "pci_process_acpi_device: invalid _BBN 0x%x", + busnum); + return (AE_CTRL_DEPTH); + } + + if (cb->ppac_func((uint32_t)busnum, cb->ppac_arg)) + return (AE_CTRL_DEPTH); + return (AE_CTRL_TERMINATE); + } + + /* PCI and no _BBN, continue walk */ + return (AE_OK); +} + +void +pci_prd_root_complex_iter(pci_prd_root_complex_f func, void *arg) +{ + void *rv; + pci_prd_acpi_cb_t cb; + + cb.ppac_func = func; + cb.ppac_arg = arg; + + /* + * First scan ACPI devices for anything that might be here. After that, + * go through and check the old BIOS IRQ routing table for additional + * buses. Note, slot naming from the IRQ table comes later. + */ + (void) AcpiGetDevices(NULL, pci_process_acpi_device, &cb, &rv); + pci_bios_bus_iter(func, arg); + +} + /* - * At this point, the bus hierarchy entries do not appear to - * provide anything we can't find out from PCI config space. - * The only interesting bit is the ISA bus number, which we - * don't care. + * If there is actually a PCI IRQ routing table present, then we want to use + * this to go back and update the slot name. In particular, if we have no PCI + * IRQ routing table, then we use the existing slot names that were already set + * up for us in picex_slot_names_prop() from the capability register. Otherwise, + * we actually delete all slot-names properties from buses and instead use + * something from the IRQ routing table if it exists. + * + * Note, the property is always deleted regardless of whether or not it exists + * in the IRQ routing table. Finally, we have traditionally kept "pcie0" names + * as special as apparently that can't be represented in the IRQ routing table. */ -int -mps_find_parent_bus(int bus) +void +pci_prd_slot_name(uint32_t bus, dev_info_t *dip) { - struct sasm *sasmp; - uchar_t *extp; + char slotprop[256]; + int len; + char *slotcap_name; - if (mps_extp == NULL) - return (-1); + if (pci_irq_nroutes == 0) + return; - extp = mps_extp; - while (extp < mps_ext_endp) { - bhdp = (struct bhd *)extp; - switch (*extp) { - case SYS_AS_MAPPING: - extp += SYS_AS_MAPPING_SIZE; - break; - case BUS_HIERARCHY_DESC: - if (bhdp->bhd_bus_id == bus) - return (bhdp->bhd_parent); - extp += BUS_HIERARCHY_DESC_SIZE; - break; - case COMP_BUS_AS_MODIFIER: - extp += COMP_BUS_AS_MODIFIER_SIZE; - break; - default: - cmn_err(CE_WARN, "Unknown descriptor type %d" - " in BIOS Multiprocessor Spec table.", - *extp); - return (-1); + if (dip != NULL) { + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pci_bus_res[bus].dip, + DDI_PROP_DONTPASS, "slot-names", &slotcap_name) != + DDI_SUCCESS || strcmp(slotcap_name, "pcie0") != 0) { + (void) ndi_prop_remove(DDI_DEV_T_NONE, + pci_bus_res[bus].dip, "slot-names"); + } + } + + + len = pci_slot_names_prop(bus, slotprop, sizeof (slotprop)); + if (len > 0) { + if (dip != NULL) { + ASSERT((len % sizeof (int)) == 0); + (void) ndi_prop_update_int_array(DDI_DEV_T_NONE, + pci_bus_res[bus].dip, "slot-names", + (int *)slotprop, len / sizeof (int)); + } else { + cmn_err(CE_NOTE, "!BIOS BUG: Invalid bus number in PCI " + "IRQ routing table; Not adding slot-names " + "property for incorrect bus %d", bus); } } - return (-1); } -int -hrt_find_bus_range(int bus) +boolean_t +pci_prd_multi_root_ok(void) { - int i, max_bus, sub_bus; - struct php_entry *hpep; + return (acpi_resource_discovery > 0); +} - if (hrt_hpep == NULL || hrt_entry_cnt == 0) { - return (-1); +int +pci_prd_init(pci_prd_upcalls_t *upcalls) +{ + if (ddi_prop_exists(DDI_DEV_T_ANY, ddi_root_node(), DDI_PROP_DONTPASS, + "efi-systab")) { + pci_prd_have_bios = B_FALSE; } - hpep = hrt_hpep; - max_bus = -1; - for (i = 0; i < hrt_entry_cnt; i++, hpep++) { - if (hpep->php_pri_bus != bus) - continue; - sub_bus = (int)hpep->php_subord_bus; - if (sub_bus > max_bus) - max_bus = sub_bus; + + prd_upcalls = upcalls; + + return (0); +} + +void +pci_prd_fini(void) +{ + int bus; + + for (bus = 0; bus <= pci_bios_maxbus; bus++) { + memlist_free_all(&acpi_io_res[bus]); + memlist_free_all(&acpi_mem_res[bus]); + memlist_free_all(&acpi_pmem_res[bus]); + memlist_free_all(&acpi_bus_res[bus]); } - return (max_bus); } -#endif /* UNUSED_BUS_HIERARY_INFO */ +static struct modlmisc pci_prd_modlmisc_i86pc = { + .misc_modops = &mod_miscops, + .misc_linkinfo = "i86pc PCI Resource Discovery" +}; + +static struct modlinkage pci_prd_modlinkage_i86pc = { + .ml_rev = MODREV_1, + .ml_linkage = { &pci_prd_modlmisc_i86pc, NULL } +}; + +int +_init(void) +{ + return (mod_install(&pci_prd_modlinkage_i86pc)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&pci_prd_modlinkage_i86pc, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&pci_prd_modlinkage_i86pc)); +} diff --git a/usr/src/uts/intel/io/pci/pcihrt.h b/usr/src/uts/i86pc/io/pci/pcihrt.h index 7192eca2c5..7857e0314f 100644 --- a/usr/src/uts/intel/io/pci/pcihrt.h +++ b/usr/src/uts/i86pc/io/pci/pcihrt.h @@ -31,16 +31,14 @@ #ifndef _PCIHRT_H #define _PCIHRT_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif struct hrt_hdr { /* PCI Hot-Plug Configuration Resource Table header */ - uint32_t hrt_sig; /* $HRT */ + uint32_t hrt_sig; /* $HRT */ uint16_t hrt_avail_imap; /* Bitmap of unused IRQs */ - uint16_t hrt_used_imap; /* Bitmap of IRQs used by PCI */ + uint16_t hrt_used_imap; /* Bitmap of IRQs used by PCI */ uchar_t hrt_entry_cnt; /* no. of PCI hot-plug slot entries */ uchar_t hrt_ver; /* version no. = 1 */ uchar_t hrt_resv0; /* reserved */ @@ -58,7 +56,7 @@ struct php_entry { /* PCI hot-plug slot entry */ uchar_t php_subord_bus; /* Max Subordinate bus of this slot */ uint16_t php_io_start; /* allocated I/O space starting addr */ uint16_t php_io_size; /* allocated I/O space size in bytes */ - uint16_t php_mem_start; /* allocated Memory space start addr */ + uint16_t php_mem_start; /* allocated Memory space start addr */ uint16_t php_mem_size; /* allocated Memory space size in 64k */ uint16_t php_pfmem_start; /* allocated Prefetchable Memory start */ uint16_t php_pfmem_size; /* allocated Prefetchable size in 64k */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c index cf00426300..844e8b9708 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include <machine/clock.h> #include <machine/cpufunc.h> #include <machine/md_var.h> -#include <machine/pcb.h> #include <machine/specialreg.h> #include <machine/vmm.h> diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h index 7584213d39..e94f7a876b 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h @@ -39,7 +39,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. */ @@ -161,6 +161,8 @@ int vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state, uint8_t *sipi_vec); int vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec); +int vm_get_fpu(struct vm *vm, int vcpuid, void *buf, size_t len); +int vm_set_fpu(struct vm *vm, int vcpuid, void *buf, size_t len); int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *); int vm_suspend(struct vm *vm, enum vm_suspend_how how); int vm_inject_nmi(struct vm *vm, int vcpu); diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 16acc1ea2c..78a810880d 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -60,8 +60,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sched.h> #include <sys/systm.h> #include <sys/sunddi.h> +#include <sys/hma.h> -#include <machine/pcb.h> #include <machine/md_var.h> #include <x86/psl.h> #include <x86/apicreg.h> @@ -132,7 +132,7 @@ struct vcpu { int exc_errcode_valid; uint32_t exc_errcode; uint8_t sipi_vector; /* (i) SIPI vector */ - struct savefpu *guestfpu; /* (a,i) guest fpu state */ + hma_fpu_t *guestfpu; /* (a,i) guest fpu state */ uint64_t guest_xcr0; /* (i) guest %xcr0 register */ void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ @@ -318,7 +318,8 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy) VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); if (destroy) { vmm_stat_free(vcpu->stats); - fpu_save_area_free(vcpu->guestfpu); + hma_fpu_free(vcpu->guestfpu); + vcpu->guestfpu = NULL; vie_free(vcpu->vie_ctx); vcpu->vie_ctx = NULL; vmc_destroy(vcpu->vmclient); @@ -342,7 +343,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) vcpu->state = VCPU_IDLE; vcpu->hostcpu = NOCPU; vcpu->lastloccpu = NOCPU; - vcpu->guestfpu = fpu_save_area_alloc(); + vcpu->guestfpu = hma_fpu_alloc(KM_SLEEP); vcpu->stats = vmm_stat_alloc(); vcpu->vie_ctx = vie_alloc(); @@ -369,7 +370,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) vcpu->extint_pending = 0; vcpu->exception_pending = 0; vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; - fpu_save_area_reset(vcpu->guestfpu); + hma_fpu_init(vcpu->guestfpu); vmm_stat_init(vcpu->stats); vcpu->tsc_offset = 0; } @@ -1168,6 +1169,50 @@ vm_set_seg_desc(struct vm *vm, int vcpu, int reg, const struct seg_desc *desc) return (VMSETDESC(vm->cookie, vcpu, reg, desc)); } +static int +translate_hma_xsave_result(hma_fpu_xsave_result_t res) +{ + switch (res) { + case HFXR_OK: + return (0); + case HFXR_NO_SPACE: + return (ENOSPC); + case HFXR_BAD_ALIGN: + case HFXR_UNSUP_FMT: + case HFXR_UNSUP_FEAT: + case HFXR_INVALID_DATA: + return (EINVAL); + default: + panic("unexpected xsave result"); + } +} + +int +vm_get_fpu(struct vm *vm, int vcpuid, void *buf, size_t len) +{ + if (vcpuid < 0 || vcpuid >= vm->maxcpus) + return (EINVAL); + + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + hma_fpu_xsave_result_t res; + + res = hma_fpu_get_xsave_state(vcpu->guestfpu, buf, len); + return (translate_hma_xsave_result(res)); +} + +int +vm_set_fpu(struct vm *vm, int vcpuid, void *buf, size_t len) +{ + if (vcpuid < 0 || vcpuid >= vm->maxcpus) + return (EINVAL); + + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + hma_fpu_xsave_result_t res; + + res = hma_fpu_set_xsave_state(vcpu->guestfpu, buf, len); + return (translate_hma_xsave_result(res)); +} + int vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state, uint8_t *sipi_vec) { @@ -1220,13 +1265,9 @@ vm_track_dirty_pages(struct vm *vm, uint64_t gpa, size_t len, uint8_t *bitmap) static void restore_guest_fpustate(struct vcpu *vcpu) { - - /* flush host state to the pcb */ - fpuexit(curthread); - - /* restore guest FPU state */ + /* Save host FPU and restore guest FPU */ fpu_stop_emulating(); - fpurestore(vcpu->guestfpu); + hma_fpu_start_guest(vcpu->guestfpu); /* restore guest XCR0 if XSAVE is enabled in the host */ if (rcr4() & CR4_XSAVE) @@ -1252,9 +1293,9 @@ save_guest_fpustate(struct vcpu *vcpu) load_xcr(0, vmm_get_host_xcr0()); } - /* save guest FPU state */ + /* save guest FPU and restore host FPU */ fpu_stop_emulating(); - fpusave(vcpu->guestfpu); + hma_fpu_stop_guest(vcpu->guestfpu); /* * When the host state has been restored, we should not re-enable * CR0.TS on illumos for eager FPU. @@ -2912,7 +2953,7 @@ vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only) */ if (!init_only) { vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; - fpu_save_area_reset(vcpu->guestfpu); + hma_fpu_init(vcpu->guestfpu); /* XXX: clear MSRs and other pieces */ } diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index a83989e9eb..4ef2e5f583 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -414,6 +414,8 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, case VM_RESET_CPU: case VM_GET_RUN_STATE: case VM_SET_RUN_STATE: + case VM_GET_FPU: + case VM_SET_FPU: /* * Copy in the ID of the vCPU chosen for this operation. * Since a nefarious caller could update their struct between @@ -469,6 +471,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, case VM_GET_GPA_PMAP: case VM_IOAPIC_PINCOUNT: case VM_SUSPEND: + case VM_DESC_FPU_AREA: default: break; } @@ -755,6 +758,53 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, } break; } + case VM_DESC_FPU_AREA: { + struct vm_fpu_desc desc; + void *buf = NULL; + + if (ddi_copyin(datap, &desc, sizeof (desc), md)) { + error = EFAULT; + break; + } + if (desc.vfd_num_entries > 64) { + error = EINVAL; + break; + } + const size_t buf_sz = sizeof (struct vm_fpu_desc_entry) * + desc.vfd_num_entries; + if (buf_sz != 0) { + buf = kmem_zalloc(buf_sz, KM_SLEEP); + } + + /* + * For now, we are depending on vm_fpu_desc_entry and + * hma_xsave_state_desc_t having the same format. + */ + CTASSERT(sizeof (struct vm_fpu_desc_entry) == + sizeof (hma_xsave_state_desc_t)); + + size_t req_size; + const uint_t max_entries = hma_fpu_describe_xsave_state( + (hma_xsave_state_desc_t *)buf, + desc.vfd_num_entries, + &req_size); + + desc.vfd_req_size = req_size; + desc.vfd_num_entries = max_entries; + if (buf_sz != 0) { + if (ddi_copyout(buf, desc.vfd_entry_data, buf_sz, md)) { + error = EFAULT; + } + kmem_free(buf, buf_sz); + } + + if (error == 0) { + if (ddi_copyout(&desc, datap, sizeof (desc), md)) { + error = EFAULT; + } + } + break; + } case VM_ISA_ASSERT_IRQ: { struct vm_isa_irq isa_irq; @@ -1040,6 +1090,51 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, vrs.sipi_vector); break; } + case VM_GET_FPU: { + struct vm_fpu_state req; + const size_t max_len = (PAGESIZE * 2); + void *kbuf; + + if (ddi_copyin(datap, &req, sizeof (req), md)) { + error = EFAULT; + break; + } + if (req.len > max_len || req.len == 0) { + error = EINVAL; + break; + } + kbuf = kmem_zalloc(req.len, KM_SLEEP); + error = vm_get_fpu(sc->vmm_vm, vcpu, kbuf, req.len); + if (error == 0) { + if (ddi_copyout(kbuf, req.buf, req.len, md)) { + error = EFAULT; + } + } + kmem_free(kbuf, req.len); + break; + } + case VM_SET_FPU: { + struct vm_fpu_state req; + const size_t max_len = (PAGESIZE * 2); + void *kbuf; + + if (ddi_copyin(datap, &req, sizeof (req), md)) { + error = EFAULT; + break; + } + if (req.len > max_len || req.len == 0) { + error = EINVAL; + break; + } + kbuf = kmem_alloc(req.len, KM_SLEEP); + if (ddi_copyin(req.buf, kbuf, req.len, md)) { + error = EFAULT; + } else { + error = vm_set_fpu(sc->vmm_vm, vcpu, kbuf, req.len); + } + kmem_free(kbuf, req.len); + break; + } case VM_SET_KERNEMU_DEV: case VM_GET_KERNEMU_DEV: { diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c index f78db731d6..cdcebc71d4 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c @@ -58,7 +58,6 @@ #include <sys/x86_archext.h> #include <machine/cpufunc.h> -#include <machine/fpu.h> #include <machine/md_var.h> #include <machine/specialreg.h> #include <machine/vmm.h> @@ -434,67 +433,6 @@ vmm_cpuid_init(void) cpu_exthigh = regs[0]; } -/* - * FreeBSD uses the struct savefpu for managing the FPU state. That is mimicked - * by our hypervisor multiplexor framework structure. - */ -struct savefpu * -fpu_save_area_alloc(void) -{ - return ((struct savefpu *)hma_fpu_alloc(KM_SLEEP)); -} - -void -fpu_save_area_free(struct savefpu *fsa) -{ - hma_fpu_t *fpu = (hma_fpu_t *)fsa; - hma_fpu_free(fpu); -} - -void -fpu_save_area_reset(struct savefpu *fsa) -{ - hma_fpu_t *fpu = (hma_fpu_t *)fsa; - hma_fpu_init(fpu); -} - -/* - * This glue function is supposed to save the host's FPU state. This is always - * paired in the general bhyve code with a call to fpusave. Therefore, we treat - * this as a nop and do all the work in fpusave(), which will have the context - * argument that we want anyways. - */ -void -fpuexit(kthread_t *td) -{ -} - -/* - * This glue function is supposed to restore the guest's FPU state from the save - * area back to the host. In FreeBSD, it is assumed that the host state has - * already been saved by a call to fpuexit(); however, we do both here. - */ -void -fpurestore(void *arg) -{ - hma_fpu_t *fpu = arg; - - hma_fpu_start_guest(fpu); -} - -/* - * This glue function is supposed to save the guest's FPU state. The host's FPU - * state is not expected to be restored necessarily due to the use of FPU - * emulation through CR0.TS. However, we can and do restore it here. - */ -void -fpusave(void *arg) -{ - hma_fpu_t *fpu = arg; - - hma_fpu_stop_guest(fpu); -} - void vmm_sol_glue_init(void) { diff --git a/usr/src/uts/i86pc/os/hma_fpu.c b/usr/src/uts/i86pc/os/hma_fpu.c index 14cfa8baed..138af7a32a 100644 --- a/usr/src/uts/i86pc/os/hma_fpu.c +++ b/usr/src/uts/i86pc/os/hma_fpu.c @@ -11,6 +11,7 @@ /* * Copyright (c) 2018, Joyent, Inc. + * Copyright 2022 Oxide Computer Company */ /* @@ -28,6 +29,12 @@ #include <sys/hma.h> #include <sys/x86_archext.h> #include <sys/archsystm.h> +#include <sys/controlregs.h> +#include <sys/sysmacros.h> +#include <sys/stdbool.h> +#include <sys/ontrap.h> +#include <sys/cpuvar.h> +#include <sys/disp.h> struct hma_fpu { fpu_ctx_t hf_guest_fpu; @@ -57,7 +64,7 @@ hma_fpu_init(hma_fpu_t *fpu) xs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs; bzero(xs, cpuid_get_xsave_size()); bcopy(&avx_initial, xs, sizeof (*xs)); - xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; + xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; fpu->hf_guest_fpu.fpu_xsave_mask = XFEATURE_FP_ALL; break; default: @@ -140,6 +147,36 @@ hma_fpu_start_guest(hma_fpu_t *fpu) fpu->hf_guest_fpu.fpu_flags &= ~FPU_VALID; } +/* + * Since fp_save() assumes a thread-centric view of the FPU usage -- it will + * assert if attempting to save elsewhere than the thread PCB, and will elide + * action if the FPU is not enabled -- we cannot use it for the manual saving of + * FPU contents. To work around that, we call the save mechanism directly. + */ +static void +do_fp_save(fpu_ctx_t *fpu) +{ + /* + * For our manual saving, we expect that the thread PCB never be the + * landing zone for the data. + */ + ASSERT(curthread->t_lwp == NULL || + fpu != &curthread->t_lwp->lwp_pcb.pcb_fpu); + + switch (fp_save_mech) { + case FP_FXSAVE: + fpxsave(fpu->fpu_regs.kfpu_u.kfpu_fx); + break; + case FP_XSAVE: + xsavep(fpu->fpu_regs.kfpu_u.kfpu_xs, fpu->fpu_xsave_mask); + break; + default: + panic("Invalid fp_save_mech"); + } + fpu->fpu_flags |= FPU_VALID; +} + + void hma_fpu_stop_guest(hma_fpu_t *fpu) { @@ -148,29 +185,232 @@ hma_fpu_stop_guest(hma_fpu_t *fpu) ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0); ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, ==, 0); + do_fp_save(&fpu->hf_guest_fpu); + + fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu); + + fpu->hf_inguest = B_FALSE; + fpu->hf_curthread = NULL; +} + +/* + * Will output up to `ndesc` records into `descp`. The required size for an + * XSAVE area containing all of the data fields supported by the host will be + * placed in `req_sizep` (if non-NULL). Returns the number of feature bits + * supported by the host. + */ +uint_t +hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *descp, uint_t ndesc, + size_t *req_sizep) +{ + uint64_t features; + + switch (fp_save_mech) { + case FP_FXSAVE: + /* + * Even without xsave support, the FPU will have legacy x87 + * float and SSE state contained within. + */ + features = XFEATURE_LEGACY_FP | XFEATURE_SSE; + break; + case FP_XSAVE: + features = get_xcr(XFEATURE_ENABLED_MASK); + break; + default: + panic("Invalid fp_save_mech"); + } + + uint_t count, pos; + uint_t max_size = MIN_XSAVE_SIZE; + for (count = 0, pos = 0; pos <= 63; pos++) { + const uint64_t bit = (1 << pos); + uint32_t size, off; + + if ((features & bit) == 0) { + continue; + } + + if (bit == XFEATURE_LEGACY_FP || bit == XFEATURE_SSE) { + size = sizeof (struct fxsave_state); + off = 0; + } else { + /* + * Size and position of data types within the XSAVE area + * is described in leaf 0xD in the subfunction + * corresponding to the bit position (for pos > 1). + */ + struct cpuid_regs regs = { + .cp_eax = 0xD, + .cp_ecx = pos, + }; + + ASSERT3U(pos, >, 1); + + (void) __cpuid_insn(®s); + size = regs.cp_eax; + off = regs.cp_ebx; + } + max_size = MAX(max_size, off + size); + + if (count < ndesc) { + hma_xsave_state_desc_t *desc = &descp[count]; + + desc->hxsd_bit = bit; + desc->hxsd_size = size; + desc->hxsd_off = off; + } + count++; + } + if (req_sizep != NULL) { + *req_sizep = max_size; + } + return (count); +} + +hma_fpu_xsave_result_t +hma_fpu_get_xsave_state(const hma_fpu_t *fpu, void *buf, size_t len) +{ + ASSERT(!fpu->hf_inguest); + + size_t valid_len; + switch (fp_save_mech) { + case FP_FXSAVE: { + if (len < MIN_XSAVE_SIZE) { + return (HFXR_NO_SPACE); + } + bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf, + sizeof (struct fxsave_state)); + + struct xsave_header hdr = { + .xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE, + }; + bcopy(&hdr, buf + sizeof (struct fxsave_state), sizeof (hdr)); + + break; + } + case FP_XSAVE: + (void) hma_fpu_describe_xsave_state(NULL, 0, &valid_len); + if (len < valid_len) { + return (HFXR_NO_SPACE); + } + bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf, + valid_len); + break; + default: + panic("Invalid fp_save_mech"); + } + + return (HFXR_OK); +} + +hma_fpu_xsave_result_t +hma_fpu_set_xsave_state(hma_fpu_t *fpu, void *buf, size_t len) +{ + ASSERT(!fpu->hf_inguest); + + if (len < MIN_XSAVE_SIZE) { + return (HFXR_NO_SPACE); + } + /* 64-byte alignment is demanded of the FPU-related operations */ + if (((uintptr_t)buf & 63) != 0) { + return (HFXR_BAD_ALIGN); + } + + struct xsave_header *hdr = buf + sizeof (struct fxsave_state); + if (hdr->xsh_xcomp_bv != 0) { + /* XSAVEC formatting not supported at this time */ + return (HFXR_UNSUP_FMT); + } + + uint64_t allowed_bits; + size_t save_area_size; + switch (fp_save_mech) { + case FP_FXSAVE: + allowed_bits = XFEATURE_LEGACY_FP | XFEATURE_SSE; + save_area_size = sizeof (struct fxsave_state); + break; + case FP_XSAVE: + allowed_bits = get_xcr(XFEATURE_ENABLED_MASK); + save_area_size = cpuid_get_xsave_size(); + break; + default: + panic("Invalid fp_save_mech"); + } + if ((hdr->xsh_xstate_bv & ~(allowed_bits)) != 0) { + return (HFXR_UNSUP_FEAT); + } + /* - * Note, we can't use fp_save because it assumes that we're saving to - * the thread's PCB and not somewhere else. Because this is a different - * FPU context, we instead have to do this ourselves. + * We validate the incoming state with the FPU itself prior to saving it + * into the guest FPU context area. In order to preserve any state + * currently housed in the FPU, we save it to a temporarily allocated + * FPU context. It is important to note that we are not following the + * normal rules around state management detailed in uts/intel/os/fpu.c. + * This saving is unconditional, uncaring about the state in the FPU or + * the value of CR0_TS, simplifying our process before returning to the + * caller (without needing to chcek of an lwp, etc). To prevent + * interrupting threads from encountering this unusual FPU state, we + * keep interrupts disabled for the duration. */ + fpu_ctx_t temp_ctx = { + .fpu_xsave_mask = XFEATURE_FP_ALL, + }; + temp_ctx.fpu_regs.kfpu_u.kfpu_generic = + kmem_cache_alloc(fpsave_cachep, KM_SLEEP); + bzero(temp_ctx.fpu_regs.kfpu_u.kfpu_generic, save_area_size); + + ulong_t iflag; + iflag = intr_clear(); + bool disable_when_done = (getcr0() & CR0_TS) != 0; + do_fp_save(&temp_ctx); + + /* + * If the provided data is invalid, it will cause a #GP when we attempt + * to load it into the FPU, so protect against that with on_trap(). + * Should the data load successfully, we can then be confident that its + * later use in via hma_fpu_start_guest() will be safe. + */ + on_trap_data_t otd; + volatile hma_fpu_xsave_result_t res = HFXR_OK; + if (on_trap(&otd, OT_DATA_EC) != 0) { + res = HFXR_INVALID_DATA; + goto done; + } + switch (fp_save_mech) { case FP_FXSAVE: - fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx); + if (hdr->xsh_xstate_bv == 0) { + /* + * An empty xstate_bv means we can simply load the + * legacy FP/SSE area with their initial state. + */ + bcopy(&sse_initial, + fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx, + sizeof (sse_initial)); + } else { + fpxrestore(buf); + fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx); + } break; case FP_XSAVE: + xrestore(buf, XFEATURE_FP_ALL); xsavep(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs, fpu->hf_guest_fpu.fpu_xsave_mask); break; default: panic("Invalid fp_save_mech"); - /*NOTREACHED*/ } - fpu->hf_guest_fpu.fpu_flags |= FPU_VALID; - fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu); +done: + no_trap(); + fp_restore(&temp_ctx); + if (disable_when_done) { + fpdisable(); + } + intr_restore(iflag); + kmem_cache_free(fpsave_cachep, temp_ctx.fpu_regs.kfpu_u.kfpu_generic); - fpu->hf_inguest = B_FALSE; - fpu->hf_curthread = NULL; + return (res); } void @@ -214,11 +454,11 @@ hma_fpu_set_fxsave_state(hma_fpu_t *fpu, const struct fxsave_state *fx) gxs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs; bzero(gxs, cpuid_get_xsave_size()); bcopy(fx, &gxs->xs_fxsave, sizeof (*fx)); - gxs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; + gxs->xs_header.xsh_xstate_bv = + XFEATURE_LEGACY_FP | XFEATURE_SSE; break; default: panic("Invalid fp_save_mech"); - /* NOTREACHED */ } return (0); diff --git a/usr/src/uts/i86pc/os/pci_bios.c b/usr/src/uts/i86pc/os/pci_bios.c index a24064b8cb..84653435ad 100644 --- a/usr/src/uts/i86pc/os/pci_bios.c +++ b/usr/src/uts/i86pc/os/pci_bios.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -218,3 +219,23 @@ pci_slot_names_prop(int bus, char *buf, int len) *(buf + plen) = 0; return (plen); } + +/* + * This is used to discover additional PCI buses that may exist in the system in + * addition to the ACPI _BBN method. Historically these were discovered by + * asking if there was a valid slot property, e.g. pci_slot_names_prop() + * returned valid data. In this case we return any entry that has a bus number + * and a non-zero slot value. We rely on the core PCI code to do dedup for us. + */ +void +pci_bios_bus_iter(pci_prd_root_complex_f cbfunc, void *arg) +{ + int i; + for (i = 0; i < pci_irq_nroutes; i++) { + if (pci_irq_routes[i].pir_slot != 0) { + if (!cbfunc(pci_irq_routes[i].pir_bus, arg)) { + return; + } + } + } +} diff --git a/usr/src/uts/i86pc/pci_prd/Makefile b/usr/src/uts/i86pc/pci_prd/Makefile new file mode 100644 index 0000000000..ca262cc4a4 --- /dev/null +++ b/usr/src/uts/i86pc/pci_prd/Makefile @@ -0,0 +1,66 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2022 Oxide Computer Company +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = pci_prd +OBJECTS = $(PCI_PRD_OBJS:%=$(OBJS_DIR)/%) +ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/Makefile.i86pc + +# +# Define targets +# +ALL_TARGET = $(BINARY) +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +LDFLAGS += -Nmisc/acpica + +# +# Overrides +# + +ALL_BUILDS = $(ALL_BUILDSONLY64) +DEF_BUILDS = $(DEF_BUILDSONLY64) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/Makefile.targ diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h index 16ab708896..e15cd60d5e 100644 --- a/usr/src/uts/i86pc/sys/hma.h +++ b/usr/src/uts/i86pc/sys/hma.h @@ -11,6 +11,7 @@ /* * Copyright 2019 Joyent, Inc. + * Copyright 2022 Oxide Computer Company */ #ifndef _SYS_HMA_H @@ -117,6 +118,43 @@ extern void hma_fpu_start_guest(hma_fpu_t *); */ extern void hma_fpu_stop_guest(hma_fpu_t *); +typedef enum { + HFXR_OK = 0, + HFXR_NO_SPACE, /* buffer is not large enough */ + HFXR_BAD_ALIGN, /* buffer is not properly (64-byte) aligned */ + HFXR_UNSUP_FMT, /* data using unsupported (compressed) format */ + HFXR_UNSUP_FEAT, /* data has unsupported features set */ + HFXR_INVALID_DATA, /* CPU determined xsave data is invalid */ +} hma_fpu_xsave_result_t; + +/* + * Get and set the contents of the FPU save area, formatted as XSAVE-style + * information. If XSAVE is not supported by the host, the input and output + * values will be translated to and from the FXSAVE format. Attempts to set + * XSAVE values not supported by the host will result in an error. + * + * These functions cannot be called while the FPU is in use by the guest. It is + * up to callers to guarantee this invariant. + */ +extern hma_fpu_xsave_result_t hma_fpu_get_xsave_state(const hma_fpu_t *, void *, + size_t); +extern hma_fpu_xsave_result_t hma_fpu_set_xsave_state(hma_fpu_t *, void *, + size_t); + +typedef struct hma_xsave_state_desc { + uint64_t hxsd_bit; + uint32_t hxsd_size; + uint32_t hxsd_off; +} hma_xsave_state_desc_t; + +/* + * Get a description of the data fields supported by the host via the XSAVE APIs + * for getting/setting guest FPU data. See the function definition for more + * detailed parameter usage. + */ +extern uint_t hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *, uint_t, + size_t *); + /* * Get and set the contents of the FPU save area. This sets the fxsave style * information. In all cases when this is in use, if an XSAVE state is actually diff --git a/usr/src/uts/i86pc/sys/pci_cfgspace_impl.h b/usr/src/uts/i86pc/sys/pci_cfgspace_impl.h index 6cec63bcfd..05e0710bbe 100644 --- a/usr/src/uts/i86pc/sys/pci_cfgspace_impl.h +++ b/usr/src/uts/i86pc/sys/pci_cfgspace_impl.h @@ -21,16 +21,18 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #ifndef _SYS_PCI_CFGSPACE_IMPL_H #define _SYS_PCI_CFGSPACE_IMPL_H /* - * Routines to support particular PCI chipsets + * Routines to support particular PCI chipsets and the PCI BIOS. */ +#include <sys/plat/pci_prd.h> + #ifdef __cplusplus extern "C" { #endif @@ -183,6 +185,11 @@ typedef struct pci_irq_route_hdr { } pci_irq_route_hdr_t; #pragma pack() +extern int pci_irq_nroutes; + +extern int pci_slot_names_prop(int, char *, int); +extern void pci_bios_bus_iter(pci_prd_root_complex_f, void *); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h index 3282fa86bf..027a7da214 100644 --- a/usr/src/uts/i86pc/sys/vmm_dev.h +++ b/usr/src/uts/i86pc/sys/vmm_dev.h @@ -302,6 +302,25 @@ struct vm_run_state { uint8_t _pad[3]; }; +/* Transfer data for VM_GET_FPU and VM_SET_FPU */ +struct vm_fpu_state { + int vcpuid; + void *buf; + size_t len; +}; + +struct vm_fpu_desc_entry { + uint64_t vfde_feature; + uint32_t vfde_size; + uint32_t vfde_off; +}; + +struct vm_fpu_desc { + struct vm_fpu_desc_entry *vfd_entry_data; + size_t vfd_req_size; + uint32_t vfd_num_entries; +}; + struct vmm_resv_query { size_t vrq_free_sz; size_t vrq_alloc_sz; @@ -370,6 +389,8 @@ struct vmm_dirty_tracker { #define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16) #define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17) #define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18) +#define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19) +#define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a) /* Operations requiring write-locking the VM */ #define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01) @@ -428,6 +449,7 @@ struct vmm_dirty_tracker { /* Note: forces a barrier on a flush operation before returning. */ #define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20) +#define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21) #define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff) diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index 20d66a08e4..38b1177bbe 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -165,7 +165,7 @@ PCIEB_OBJS += pcieb_x86.o PIT_BEEP_OBJS += pit_beep.o POWER_OBJS += power.o PCI_AUTOCONFIG_OBJS += pci_autoconfig.o pci_boot.o pcie_nvidia.o \ - pci_memlist.o pci_resource.o + pci_memlist.o RADEON_OBJS += r300_cmdbuf.o radeon_cp.o radeon_drv.o \ radeon_state.o radeon_irq.o radeon_mem.o SD_OBJS += sd.o sd_xbuf.o diff --git a/usr/src/uts/intel/io/pci/pci_autoconfig.c b/usr/src/uts/intel/io/pci/pci_autoconfig.c index 713493eedb..af7ba637d8 100644 --- a/usr/src/uts/intel/io/pci/pci_autoconfig.c +++ b/usr/src/uts/intel/io/pci/pci_autoconfig.c @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2022 Oxide Computer Company */ /* @@ -40,8 +41,10 @@ #include <sys/reboot.h> #include <sys/pci_cfgspace_impl.h> #include <sys/mutex.h> +#include <sys/plat/pci_prd.h> extern int pci_boot_debug; +extern int pci_boot_maxbus; /* * Interface routines @@ -49,7 +52,7 @@ extern int pci_boot_debug; void pci_enumerate(int); void pci_setup_tree(void); void pci_reprogram(void); -void bus_res_fini(void); +dev_info_t *pci_boot_bus_to_dip(uint32_t); static struct modlmisc modlmisc = { &mod_miscops, "PCI BIOS interface" @@ -59,13 +62,23 @@ static struct modlinkage modlinkage = { MODREV_1, (void *)&modlmisc, NULL }; +static pci_prd_upcalls_t pci_upcalls = { + .pru_bus2dip_f = pci_boot_bus_to_dip +}; + int _init(void) { int err; - if ((err = mod_install(&modlinkage)) != 0) + if ((err = pci_prd_init(&pci_upcalls)) != 0) { + return (err); + } + + if ((err = mod_install(&modlinkage)) != 0) { + pci_prd_fini(); return (err); + } impl_bus_add_probe(pci_enumerate); return (0); @@ -80,7 +93,7 @@ _fini(void) return (err); impl_bus_delete_probe(pci_enumerate); - bus_res_fini(); + pci_prd_fini(); return (0); } @@ -102,6 +115,14 @@ pci_enumerate(int reprogram) extern void add_pci_fixes(void); extern void undo_pci_fixes(void); + /* + * On our first pass through here actually determine what the maximum + * bus that we should use is. + */ + if (reprogram == 0) { + pci_boot_maxbus = pci_prd_max_bus(); + } + add_pci_fixes(); if (reprogram) { diff --git a/usr/src/uts/intel/io/pci/pci_boot.c b/usr/src/uts/intel/io/pci/pci_boot.c index 203d4292ee..6d72fe6507 100644 --- a/usr/src/uts/intel/io/pci/pci_boot.c +++ b/usr/src/uts/intel/io/pci/pci_boot.c @@ -23,6 +23,7 @@ * Copyright 2019 Joyent, Inc. * Copyright 2019 Western Digital Corporation * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 Oxide Computer Company */ /* @@ -101,18 +102,21 @@ * add_pci_fixes() * As for first pass. * pci_reprogram() - * pci_scan_bbn() - * The ACPI namespace is scanned for top-level - * instances of _BBN in order to enumerate the - * root-bridges in the system. If a root bridge is - * found that has not been previously discovered - * (existence inferred through its children) then - * it is added to the system. + * pci_prd_root_complex_iter() + * The platform is asked to tell us of all root + * complexes that it knows about (e.g. using the + * _BBN method via ACPI). This will include buses + * that we've already discovered and those that we + * potentially haven't. Anything that has not been + * previously discovered (or inferred to exist) is + * then added to the system. * <foreach ROOT bus> * populate_bus_res() * Find resources associated with this root bus - * from either ACPI or BIOS tables. See - * find_bus_res() in pci_resource.c + * based on what the platform provideds through the + * pci platform interfaces defined in + * sys/plat/pci_prd.h. On i86pc this is driven by + * ACPI and BIOS tables. * <foreach bus> * fix_ppb_res() * Reprogram pci(e) bridges which have not already @@ -158,7 +162,6 @@ #include <sys/pcie_impl.h> #include <sys/memlist.h> #include <sys/bootconf.h> -#include <io/pci/mps_table.h> #include <sys/pci_cfgacc.h> #include <sys/pci_cfgspace.h> #include <sys/pci_cfgspace_impl.h> @@ -172,6 +175,7 @@ #include <sys/iommulib.h> #include <sys/devcache.h> #include <sys/pci_cfgacc_x86.h> +#include <sys/plat/pci_prd.h> #define pci_getb (*pci_getb_func) #define pci_getw (*pci_getw_func) @@ -242,16 +246,13 @@ struct pci_devfunc { extern int apic_nvidia_io_max; extern int pseudo_isa; -extern int pci_bios_maxbus; static uchar_t max_dev_pci = 32; /* PCI standard */ +int pci_boot_maxbus; int pci_boot_debug = 0; int pci_debug_bus_start = -1; int pci_debug_bus_end = -1; -extern struct memlist *find_bus_res(int, int); static struct pci_fixundo *undolist = NULL; static int num_root_bus = 0; /* count of root buses */ -extern volatile int acpi_resource_discovery; -extern uint64_t mcfg_mem_base; extern void pci_cfgacc_add_workaround(uint16_t, uchar_t, uchar_t); extern dev_info_t *pcie_get_rc_dip(dev_info_t *); @@ -269,12 +270,11 @@ static void add_ppb_props(dev_info_t *, uchar_t, uchar_t, uchar_t, int, ushort_t); static void add_model_prop(dev_info_t *, uint_t); static void add_bus_range_prop(int); -static void add_bus_slot_names_prop(int); static void add_ranges_prop(int, int); static void add_bus_available_prop(int); static int get_pci_cap(uchar_t bus, uchar_t dev, uchar_t func, uint8_t cap_id); static void fix_ppb_res(uchar_t, boolean_t); -static void alloc_res_array(); +static void alloc_res_array(void); static void create_ioapic_node(int bus, int dev, int fn, ushort_t vendorid, ushort_t deviceid); static void pciex_slot_names_prop(dev_info_t *, ushort_t); @@ -283,7 +283,6 @@ static void memlist_remove_list(struct memlist **list, struct memlist *remove_list); static void ck804_fix_aer_ptr(dev_info_t *, pcie_req_id_t); -static void pci_scan_bbn(void); static int pci_unitaddr_cache_valid(void); static int pci_bus_unitaddr(int); static void pci_unitaddr_cache_create(void); @@ -292,8 +291,6 @@ static int pci_cache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *); static int pci_cache_pack_nvlist(nvf_handle_t, nvlist_t **); static void pci_cache_free_list(nvf_handle_t); -extern int pci_slot_names_prop(int, char *, int); - /* set non-zero to force PCI peer-bus renumbering */ int pci_bus_always_renumber = 0; @@ -326,6 +323,13 @@ typedef struct { nvf_handle_t puafd_handle; int pua_cache_valid = 0; +dev_info_t * +pci_boot_bus_to_dip(uint32_t busno) +{ + ASSERT3U(busno, <=, pci_boot_maxbus); + return (pci_bus_res[busno].dip); +} + static void dump_memlists_impl(const char *tag, int bus) { @@ -356,80 +360,21 @@ dump_memlists_impl(const char *tag, int bus) } } -/*ARGSUSED*/ -static ACPI_STATUS -pci_process_acpi_device(ACPI_HANDLE hdl, UINT32 level, void *ctx, void **rv) +static boolean_t +pci_rc_scan_cb(uint32_t busno, void *arg) { - ACPI_DEVICE_INFO *adi; - int busnum; - - /* - * Use AcpiGetObjectInfo() to find the device _HID - * If not a PCI root-bus, ignore this device and continue - * the walk - */ - if (ACPI_FAILURE(AcpiGetObjectInfo(hdl, &adi))) - return (AE_OK); - - if (!(adi->Valid & ACPI_VALID_HID)) { - AcpiOsFree(adi); - return (AE_OK); - } - - if (strncmp(adi->HardwareId.String, PCI_ROOT_HID_STRING, - sizeof (PCI_ROOT_HID_STRING)) && - strncmp(adi->HardwareId.String, PCI_EXPRESS_ROOT_HID_STRING, - sizeof (PCI_EXPRESS_ROOT_HID_STRING))) { - AcpiOsFree(adi); - return (AE_OK); + if (busno > pci_boot_maxbus) { + dcmn_err(CE_NOTE, "platform root complex scan returned bus " + "with invalid bus id: 0x%x", busno); + return (B_TRUE); } - AcpiOsFree(adi); - - /* - * acpica_get_busno() will check the presence of _BBN and - * fail if not present. It will then use the _CRS method to - * retrieve the actual bus number assigned, it will fall back - * to _BBN should the _CRS method fail. - */ - if (ACPI_SUCCESS(acpica_get_busno(hdl, &busnum))) { - /* - * Ignore invalid _BBN return values here (rather - * than panic) and emit a warning; something else - * may suffer failure as a result of the broken BIOS. - */ - if ((busnum < 0) || (busnum > pci_bios_maxbus)) { - dcmn_err(CE_NOTE, - "pci_process_acpi_device: invalid _BBN 0x%x", - busnum); - return (AE_CTRL_DEPTH); - } - - /* PCI with valid _BBN */ - if (pci_bus_res[busnum].par_bus == (uchar_t)-1 && - pci_bus_res[busnum].dip == NULL) - create_root_bus_dip((uchar_t)busnum); - return (AE_CTRL_DEPTH); + if (pci_bus_res[busno].par_bus == (uchar_t)-1 && + pci_bus_res[busno].dip == NULL) { + create_root_bus_dip((uchar_t)busno); } - /* PCI and no _BBN, continue walk */ - return (AE_OK); -} - -/* - * Scan the ACPI namespace for all top-level instances of _BBN - * in order to discover childless root-bridges (which enumeration - * may not find; root-bridges are inferred by the existence of - * children). This scan should find all root-bridges that have - * been enumerated, and any childless root-bridges not enumerated. - * Root-bridge for bus 0 may not have a _BBN object. - */ -static void -pci_scan_bbn() -{ - void *rv; - - (void) AcpiGetDevices(NULL, pci_process_acpi_device, NULL, &rv); + return (B_TRUE); } static void @@ -596,7 +541,7 @@ pci_unitaddr_cache_create(void) index = 0; listp = nvf_list(puafd_handle); - for (i = 0; i <= pci_bios_maxbus; i++) { + for (i = 0; i <= pci_boot_maxbus; i++) { /* skip non-root (peer) PCI busses */ if ((pci_bus_res[i].par_bus != (uchar_t)-1) || (pci_bus_res[i].dip == NULL)) @@ -622,7 +567,7 @@ pci_setup_tree(void) uint_t i, root_bus_addr = 0; alloc_res_array(); - for (i = 0; i <= pci_bios_maxbus; i++) { + for (i = 0; i <= pci_boot_maxbus; i++) { pci_bus_res[i].par_bus = (uchar_t)-1; pci_bus_res[i].root_addr = (uchar_t)-1; pci_bus_res[i].sub_bus = i; @@ -635,7 +580,7 @@ pci_setup_tree(void) /* * Now enumerate peer busses * - * We loop till pci_bios_maxbus. On most systems, there is + * We loop till pci_boot_maxbus. On most systems, there is * one more bus at the high end, which implements the ISA * compatibility bus. We don't care about that. * @@ -646,144 +591,11 @@ pci_setup_tree(void) * However, we stop enumerating phantom peers with no * device below. */ - for (i = 1; i <= pci_bios_maxbus; i++) { + for (i = 1; i <= pci_boot_maxbus; i++) { if (pci_bus_res[i].dip == NULL) { pci_bus_res[i].root_addr = root_bus_addr++; } enumerate_bus_devs(i, CONFIG_INFO); - - /* add slot-names property for named pci hot-plug slots */ - add_bus_slot_names_prop(i); - } -} - -/* - * >0 = present, 0 = not present, <0 = error - */ -static int -pci_bbn_present(int bus) -{ - ACPI_HANDLE hdl; - int rv; - - /* no dip means no _BBN */ - if (pci_bus_res[bus].dip == NULL) - return (0); - - rv = -1; /* default return value in case of error below */ - if (ACPI_SUCCESS(acpica_get_handle(pci_bus_res[bus].dip, &hdl))) { - switch (AcpiEvaluateObject(hdl, "_BBN", NULL, NULL)) { - case AE_OK: - rv = 1; - break; - case AE_NOT_FOUND: - rv = 0; - break; - default: - break; - } - } - - return (rv); -} - -/* - * Return non-zero if any PCI bus in the system has an associated - * _BBN object, 0 otherwise. - */ -static int -pci_roots_have_bbn(void) -{ - int i; - - /* - * Scan the PCI busses and look for at least 1 _BBN - */ - for (i = 0; i <= pci_bios_maxbus; i++) { - /* skip non-root (peer) PCI busses */ - if (pci_bus_res[i].par_bus != (uchar_t)-1) - continue; - - if (pci_bbn_present(i) > 0) - return (1); - } - return (0); - -} - -/* - * return non-zero if the machine is one on which we renumber - * the internal pci unit-addresses - */ -static int -pci_bus_renumber() -{ - ACPI_TABLE_HEADER *fadt; - - if (pci_bus_always_renumber) - return (1); - - /* get the FADT */ - if (AcpiGetTable(ACPI_SIG_FADT, 1, (ACPI_TABLE_HEADER **)&fadt) != - AE_OK) - return (0); - - /* compare OEM Table ID to "SUNm31" */ - if (strncmp("SUNm31", fadt->OemId, 6)) - return (0); - else - return (1); -} - -/* - * Initial enumeration of the physical PCI bus hierarchy can - * leave 'gaps' in the order of peer PCI bus unit-addresses. - * Systems with more than one peer PCI bus *must* have an ACPI - * _BBN object associated with each peer bus; use the presence - * of this object to remove gaps in the numbering of the peer - * PCI bus unit-addresses - only peer busses with an associated - * _BBN are counted. - */ -static void -pci_renumber_root_busses(void) -{ - int pci_regs[] = {0, 0, 0}; - int i, root_addr = 0; - - /* - * Currently, we only enable the re-numbering on specific - * Sun machines; this is a work-around for the more complicated - * issue of upgrade changing physical device paths - */ - if (!pci_bus_renumber()) - return; - - /* - * If we find no _BBN objects at all, we either don't need - * to do anything or can't do anything anyway - */ - if (!pci_roots_have_bbn()) - return; - - for (i = 0; i <= pci_bios_maxbus; i++) { - /* skip non-root (peer) PCI busses */ - if (pci_bus_res[i].par_bus != (uchar_t)-1) - continue; - - if (pci_bbn_present(i) < 1) { - pci_bus_res[i].root_addr = (uchar_t)-1; - continue; - } - - ASSERT(pci_bus_res[i].dip != NULL); - if (pci_bus_res[i].root_addr != root_addr) { - /* update reg property for node */ - pci_bus_res[i].root_addr = root_addr; - pci_regs[0] = pci_bus_res[i].root_addr; - (void) ndi_prop_update_int_array(DDI_DEV_T_NONE, - pci_bus_res[i].dip, "reg", (int *)pci_regs, 3); - } - root_addr++; } } @@ -810,12 +622,12 @@ remove_subtractive_res() int i, j; struct memlist *list; - for (i = 0; i <= pci_bios_maxbus; i++) { + for (i = 0; i <= pci_boot_maxbus; i++) { if (pci_bus_res[i].subtractive) { /* remove used io ports */ list = pci_bus_res[i].io_used; while (list) { - for (j = 0; j <= pci_bios_maxbus; j++) + for (j = 0; j <= pci_boot_maxbus; j++) (void) memlist_remove( &pci_bus_res[j].io_avail, list->ml_address, list->ml_size); @@ -824,7 +636,7 @@ remove_subtractive_res() /* remove used mem resource */ list = pci_bus_res[i].mem_used; while (list) { - for (j = 0; j <= pci_bios_maxbus; j++) { + for (j = 0; j <= pci_boot_maxbus; j++) { (void) memlist_remove( &pci_bus_res[j].mem_avail, list->ml_address, list->ml_size); @@ -837,7 +649,7 @@ remove_subtractive_res() /* remove used prefetchable mem resource */ list = pci_bus_res[i].pmem_used; while (list) { - for (j = 0; j <= pci_bios_maxbus; j++) { + for (j = 0; j <= pci_boot_maxbus; j++) { (void) memlist_remove( &pci_bus_res[j].pmem_avail, list->ml_address, list->ml_size); @@ -905,7 +717,7 @@ get_parbus_res(uchar_t parbus, uchar_t bus, uint64_t size, uint64_t align, * accounted for in this case. */ if ((pci_bus_res[parbus].par_bus == (uchar_t)-1) && - (num_root_bus > 1) && (acpi_resource_discovery <= 0)) { + (num_root_bus > 1) && !pci_prd_multi_root_ok()) { return (0); } @@ -1593,10 +1405,16 @@ pci_reprogram(void) int bus; /* - * Scan ACPI namespace for _BBN objects, make sure that - * childless root-bridges appear in devinfo tree + * Ask platform code for all of the root complexes it knows about in + * case we have missed anything in the scan. This is to ensure that we + * have them show up in the devinfo tree. This scan should find any + * existing entries as well. After this, go through each bus and + * ask the platform if it wants to change the name of the slot. */ - pci_scan_bbn(); + pci_prd_root_complex_iter(pci_rc_scan_cb, NULL); + for (bus = 0; bus <= pci_boot_maxbus; bus++) { + pci_prd_slot_name(bus, pci_bus_res[bus].dip); + } pci_unitaddr_cache_init(); /* @@ -1607,7 +1425,7 @@ pci_reprogram(void) int new_addr; int index = 0; - for (bus = 0; bus <= pci_bios_maxbus; bus++) { + for (bus = 0; bus <= pci_boot_maxbus; bus++) { /* skip non-root (peer) PCI busses */ if ((pci_bus_res[bus].par_bus != (uchar_t)-1) || (pci_bus_res[bus].dip == NULL)) @@ -1626,14 +1444,13 @@ pci_reprogram(void) } } else { /* perform legacy processing */ - pci_renumber_root_busses(); pci_unitaddr_cache_create(); } /* * Do root-bus resource discovery */ - for (bus = 0; bus <= pci_bios_maxbus; bus++) { + for (bus = 0; bus <= pci_boot_maxbus; bus++) { /* skip non-root (peer) PCI busses */ if (pci_bus_res[bus].par_bus != (uchar_t)-1) continue; @@ -1683,7 +1500,7 @@ pci_reprogram(void) memlist_free_all(&isa_res.mem_used); /* add bus-range property for root/peer bus nodes */ - for (i = 0; i <= pci_bios_maxbus; i++) { + for (i = 0; i <= pci_boot_maxbus; i++) { /* create bus-range property on root/peer buses */ if (pci_bus_res[i].par_bus == (uchar_t)-1) add_bus_range_prop(i); @@ -1705,10 +1522,10 @@ pci_reprogram(void) /* reprogram the non-subtractive PPB */ if (pci_reconfig) - for (i = 0; i <= pci_bios_maxbus; i++) + for (i = 0; i <= pci_boot_maxbus; i++) fix_ppb_res(i, B_FALSE); - for (i = 0; i <= pci_bios_maxbus; i++) { + for (i = 0; i <= pci_boot_maxbus; i++) { /* configure devices not configured by BIOS */ if (pci_reconfig) { /* @@ -1722,7 +1539,7 @@ pci_reprogram(void) } /* All dev programmed, so we can create available prop */ - for (i = 0; i <= pci_bios_maxbus; i++) + for (i = 0; i <= pci_boot_maxbus; i++) add_bus_available_prop(i); } @@ -1734,10 +1551,11 @@ populate_bus_res(uchar_t bus) { /* scan BIOS structures */ - pci_bus_res[bus].pmem_avail = find_bus_res(bus, PREFETCH_TYPE); - pci_bus_res[bus].mem_avail = find_bus_res(bus, MEM_TYPE); - pci_bus_res[bus].io_avail = find_bus_res(bus, IO_TYPE); - pci_bus_res[bus].bus_avail = find_bus_res(bus, BUSRANGE_TYPE); + pci_bus_res[bus].pmem_avail = pci_prd_find_resource(bus, + PCI_PRD_R_PREFETCH); + pci_bus_res[bus].mem_avail = pci_prd_find_resource(bus, PCI_PRD_R_MMIO); + pci_bus_res[bus].io_avail = pci_prd_find_resource(bus, PCI_PRD_R_IO); + pci_bus_res[bus].bus_avail = pci_prd_find_resource(bus, PCI_PRD_R_BUS); /* * attempt to initialize sub_bus from the largest range-end @@ -2054,7 +1872,7 @@ add_pci_fixes(void) { int i; - for (i = 0; i <= pci_bios_maxbus; i++) { + for (i = 0; i <= pci_boot_maxbus; i++) { /* * For each bus, apply needed fixes to the appropriate devices. * This must be done before the main enumeration loop because @@ -3197,8 +3015,8 @@ add_ppb_props(dev_info_t *dip, uchar_t bus, uchar_t dev, uchar_t func, * Some BIOSes lie about max pci busses, we allow for * such mistakes here */ - if (subbus > pci_bios_maxbus) { - pci_bios_maxbus = subbus; + if (subbus > pci_boot_maxbus) { + pci_boot_maxbus = subbus; alloc_res_array(); } @@ -3436,66 +3254,6 @@ add_bus_range_prop(int bus) } /* - * Add slot-names property for any named pci hot-plug slots - */ -static void -add_bus_slot_names_prop(int bus) -{ - char slotprop[256]; - int len; - extern int pci_irq_nroutes; - char *slotcap_name; - - /* - * If no irq routing table, then go with the slot-names as set up - * in pciex_slot_names_prop() from slot capability register (if any). - */ - if (pci_irq_nroutes == 0) - return; - - /* - * Otherise delete the slot-names we already have and use the irq - * routing table values as returned by pci_slot_names_prop() instead, - * but keep any property of value "pcie0" as that can't be represented - * in the irq routing table. - */ - if (pci_bus_res[bus].dip != NULL) { - if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pci_bus_res[bus].dip, - DDI_PROP_DONTPASS, "slot-names", &slotcap_name) != - DDI_SUCCESS || strcmp(slotcap_name, "pcie0") != 0) - (void) ndi_prop_remove(DDI_DEV_T_NONE, - pci_bus_res[bus].dip, "slot-names"); - } - - len = pci_slot_names_prop(bus, slotprop, sizeof (slotprop)); - if (len > 0) { - /* - * Only create a peer bus node if this bus may be a peer bus. - * It may be a peer bus if the dip is NULL and if par_bus is - * -1 (par_bus is -1 if this bus was not found to be - * subordinate to any PCI-PCI bridge). - * If it's not a peer bus, then the ACPI BBN-handling code - * will remove it later. - */ - if (pci_bus_res[bus].par_bus == (uchar_t)-1 && - pci_bus_res[bus].dip == NULL) { - - create_root_bus_dip(bus); - } - if (pci_bus_res[bus].dip != NULL) { - ASSERT((len % sizeof (int)) == 0); - (void) ndi_prop_update_int_array(DDI_DEV_T_NONE, - pci_bus_res[bus].dip, "slot-names", - (int *)slotprop, len / sizeof (int)); - } else { - cmn_err(CE_NOTE, "!BIOS BUG: Invalid bus number in PCI " - "IRQ routing table; Not adding slot-names " - "property for incorrect bus %d", bus); - } - } -} - -/* * Handle both PCI root and PCI-PCI bridge range properties; * non-zero 'ppb' argument select PCI-PCI bridges versus root. */ @@ -3652,11 +3410,11 @@ add_bus_available_prop(int bus) static void alloc_res_array(void) { - static int array_size = 0; - int old_size; + static uint_t array_size = 0; + uint_t old_size; void *old_res; - if (array_size > pci_bios_maxbus + 1) + if (array_size > pci_boot_maxbus + 1) return; /* array is big enough */ old_size = array_size; @@ -3665,7 +3423,7 @@ alloc_res_array(void) if (array_size == 0) array_size = 16; /* start with a reasonable number */ - while (array_size <= pci_bios_maxbus + 1) + while (array_size <= pci_boot_maxbus + 1) array_size <<= 1; pci_bus_res = (struct pci_bus_resource *)kmem_zalloc( array_size * sizeof (struct pci_bus_resource), KM_SLEEP); diff --git a/usr/src/uts/intel/io/pci/pci_memlist.c b/usr/src/uts/intel/io/pci/pci_memlist.c index 5786591420..4da76951e9 100644 --- a/usr/src/uts/intel/io/pci/pci_memlist.c +++ b/usr/src/uts/intel/io/pci/pci_memlist.c @@ -40,8 +40,8 @@ #include <sys/pci_impl.h> #include <sys/debug.h> -extern int pci_boot_debug; -#define dprintf if (pci_boot_debug) printf +int pci_memlist_debug; +#define dprintf if (pci_memlist_debug) printf void memlist_dump(struct memlist *listp) diff --git a/usr/src/uts/intel/os/archdep.c b/usr/src/uts/intel/os/archdep.c index bd5c72a3d8..49473fa2fa 100644 --- a/usr/src/uts/intel/os/archdep.c +++ b/usr/src/uts/intel/os/archdep.c @@ -269,7 +269,7 @@ setfpregs(klwp_t *lwp, fpregset_t *fp) &fpu->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave); fpu->fpu_regs.kfpu_xstatus = fp->fp_reg_set.fpchip_state.xstatus; - fpu->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= + fpu->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |= (XFEATURE_LEGACY_FP | XFEATURE_SSE); break; default: diff --git a/usr/src/uts/intel/os/fpu.c b/usr/src/uts/intel/os/fpu.c index 0a9b828288..9644282429 100644 --- a/usr/src/uts/intel/os/fpu.c +++ b/usr/src/uts/intel/os/fpu.c @@ -22,7 +22,7 @@ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2021 Joyent, Inc. * Copyright 2021 RackTop Systems, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -528,23 +528,18 @@ const struct xsave_state avx_initial = { * The definition below needs to be identical with sse_initial * defined above. */ - { - FPU_CW_INIT, /* fx_fcw */ - 0, /* fx_fsw */ - 0, /* fx_fctw */ - 0, /* fx_fop */ - 0, /* fx_rip */ - 0, /* fx_rdp */ - SSE_MXCSR_INIT /* fx_mxcsr */ - /* rest of structure is zero */ + .xs_fxsave = { + .fx_fcw = FPU_CW_INIT, + .fx_mxcsr = SSE_MXCSR_INIT, + }, + .xs_header = { + /* + * bit0 = 1 for XSTATE_BV to indicate that legacy fields are + * valid, and CPU should initialize XMM/YMM. + */ + .xsh_xstate_bv = 1, + .xsh_xcomp_bv = 0, }, - /* - * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid, - * and CPU should initialize XMM/YMM. - */ - 1, - 0 /* xs_xcomp_bv */ - /* rest of structure is zero */ }; /* @@ -656,8 +651,8 @@ fp_new_lwp(void *parent, void *child) bcopy(&avx_initial, cxs, sizeof (*cxs)); cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; cfx->fx_fcw = fx->fx_fcw; - cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) & - XFEATURE_FP_INITIAL); + cxs->xs_header.xsh_xstate_bv |= + (get_xcr(XFEATURE_ENABLED_MASK) & XFEATURE_FP_INITIAL); break; default: panic("Invalid fp_save_mech"); @@ -973,7 +968,8 @@ fpexterrflt(struct regs *rp) * Always set LEGACY_FP as it may have been cleared by XSAVE * instruction */ - fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP; + fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |= + XFEATURE_LEGACY_FP; break; default: panic("Invalid fp_save_mech"); @@ -1154,7 +1150,8 @@ fpsetcw(uint16_t fcw, uint32_t mxcsr) * Always set LEGACY_FP as it may have been cleared by XSAVE * instruction */ - fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP; + fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |= + XFEATURE_LEGACY_FP; break; default: panic("Invalid fp_save_mech"); @@ -1177,7 +1174,7 @@ kernel_fpu_fpstate_init(kfpu_state_t *kfpu) xs = kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_xs; bzero(xs, cpuid_get_xsave_size()); bcopy(&avx_initial, xs, sizeof (*xs)); - xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; + xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; kfpu->kfpu_ctx.fpu_xsave_mask = XFEATURE_FP_ALL; break; default: diff --git a/usr/src/uts/intel/pci_autoconfig/Makefile b/usr/src/uts/intel/pci_autoconfig/Makefile index 74498aea94..f3c034cb03 100644 --- a/usr/src/uts/intel/pci_autoconfig/Makefile +++ b/usr/src/uts/intel/pci_autoconfig/Makefile @@ -57,9 +57,9 @@ ALL_TARGET = $(BINARY) INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # -# Depends on acpica ACPI CA interpreter and PCI-E framework +# Depends on the platform's resource discovery and PCI-E framework # -LDFLAGS += -Nmisc/acpica -Nmisc/pcie +LDFLAGS += -Nmisc/pcie -Nmisc/pci_prd # # Default build targets. diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index dfbcf7dc1c..7423444c60 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -21,6 +21,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2018, Joyent, Inc. + * Copyright 2022 Oxide Computer Company * * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -230,11 +231,23 @@ struct fxsave_state { } __aligned(16); /* 512 bytes */ /* + * This structure represents the header portion of the data layout used by the + * 'xsave' instruction variants. It is documented in section 13.4.2 of the + * Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 1 + * (IASDv1). Although "header" is somewhat of a misnomer, considering the data + * begins at offset 512 of the xsave area, its contents dictate which portions + * of the area are present and how they may be formatted. + */ +struct xsave_header { + uint64_t xsh_xstate_bv; + uint64_t xsh_xcomp_bv; + uint64_t xsh_reserved[6]; +}; + +/* * This structure is written to memory by one of the 'xsave' instruction * variants. The first 512 bytes are compatible with the format of the 'fxsave' - * area. The header portion of the xsave layout is documented in section - * 13.4.2 of the Intel 64 and IA-32 Architectures Software Developer’s Manual, - * Volume 1 (IASDv1). The extended portion is documented in section 13.4.3. + * area. The extended portion is documented in section 13.4.3. * * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted * statically. Enabling additional xsave-related CPU features requires an @@ -245,9 +258,10 @@ struct fxsave_state { * determined dynamically by querying the CPU. See the xsave_info structure in * cpuid.c. * - * xsave component usage is tracked using bits in the xs_xstate_bv field. The - * components are documented in section 13.1 of IASDv1. For easy reference, - * this is a summary of the currently defined component bit definitions: + * xsave component usage is tracked using bits in the xstate_bv field of the + * header. The components are documented in section 13.1 of IASDv1. For easy + * reference, this is a summary of the currently defined component bit + * definitions: * x87 0x0001 * SSE 0x0002 * AVX 0x0004 @@ -259,21 +273,28 @@ struct fxsave_state { * PT 0x0100 * PKRU 0x0200 * When xsaveopt_ctxt is being used to save into the xsave_state area, the - * xs_xstate_bv field is updated by the xsaveopt instruction to indicate which + * xstate_bv field is updated by the xsaveopt instruction to indicate which * elements of the xsave area are active. * - * xs_xcomp_bv should always be 0, since we do not currently use the compressed - * form of xsave (xsavec). + * The xcomp_bv field should always be 0, since we do not currently use the + * compressed form of xsave (xsavec). */ struct xsave_state { struct fxsave_state xs_fxsave; /* 0-511 legacy region */ - uint64_t xs_xstate_bv; /* 512-519 start xsave header */ - uint64_t xs_xcomp_bv; /* 520-527 */ - uint64_t xs_reserved[6]; /* 528-575 end xsave header */ + struct xsave_header xs_header; /* 512-575 XSAVE header */ upad128_t xs_ymm[16]; /* 576 AVX component */ } __aligned(64); /* + * While AVX_XSTATE_SIZE is the smallest the kernel will allocate for FPU + * state-saving, other consumers may constrain themselves to the minimum + * possible xsave state structure, which features only the legacy area and the + * bare xsave header. + */ +#define MIN_XSAVE_SIZE (sizeof (struct fxsave_state) + \ + sizeof (struct xsave_header)) + +/* * Kernel's FPU save area */ typedef struct { |