diff options
author | Robert Mustacchi <rm@joyent.com> | 2019-06-08 01:35:39 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2019-06-21 17:52:24 +0000 |
commit | c6ce7a25abbb33fa38edd55230b7992c5a36ce91 (patch) | |
tree | 7141755661b79ec4feb3f61dea932f535fc4efe5 | |
parent | f866eafe56a327754d101195d6d63f024fb095a5 (diff) | |
download | illumos-joyent-c6ce7a25abbb33fa38edd55230b7992c5a36ce91.tar.gz |
OS-7719 PCIe speeds and feeds should be exposed to userland
OS-7718 pcieb should enable link bandwidth notifications
OS-7742 Clean up pcieb CERRWARN and smatch
OS-7743 Clean up pcie module -Wno-uninitialized and -Wno-parentheses
Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Rob Johnston <rob.johnston@joyent.com>
22 files changed, 1385 insertions, 96 deletions
@@ -9972,6 +9972,7 @@ f usr/lib/passwdutil.so.1 0755 root bin d usr/lib/pci 0755 root bin f usr/lib/pci/pcidr 0555 root bin f usr/lib/pci/pcidr_plugin.so 0755 root bin +f usr/lib/pci/pcieb 0555 root bin f usr/lib/pfexecd 0555 root bin d usr/lib/picl 0755 root sys f usr/lib/picl/picld 0555 root sys diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 426ae91336..fdbb25c9a0 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -21,7 +21,7 @@ # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2019, Joyent, Inc. +# Copyright 2019 Joyent, Inc. # Copyright (c) 2012, 2015 by Delphix. All rights reserved. # Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved. # Copyright 2014 Garrett D'Amore <garrett@damore.org> @@ -304,6 +304,7 @@ COMMON_SUBDIRS= \ pathchk \ pbind \ pcidr \ + pcieb \ pcitool \ pfexec \ pfexecd \ diff --git a/usr/src/cmd/pcieb/Makefile b/usr/src/cmd/pcieb/Makefile new file mode 100644 index 0000000000..6d81356692 --- /dev/null +++ b/usr/src/cmd/pcieb/Makefile @@ -0,0 +1,36 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +PROG= pcieb +LINTPROGS= $(PROG:%=%.ln) + +include ../Makefile.cmd + +ROOTCMDDIR = $(ROOTLIB)/pci + +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -I$(SRC)/uts/common/io/pciex/ + +.KEEP_STATE: + +all: $(PROG) + +install: all $(ROOTCMD) + +clean: + +lint: $(LINTPROGS) + +include ../Makefile.targ diff --git a/usr/src/cmd/pcieb/pcieb.c b/usr/src/cmd/pcieb/pcieb.c new file mode 100644 index 0000000000..0c829e8a51 --- /dev/null +++ b/usr/src/cmd/pcieb/pcieb.c @@ -0,0 +1,169 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +/* + * Private command to manipulate link speeds of PCIe bridges and allow + * retraining. This is designed to aid debugging. + */ + +#include <unistd.h> +#include <stdarg.h> +#include <stdio.h> +#include <libgen.h> +#include <string.h> +#include <err.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include <pcieb_ioctl.h> + +static const char *pcieb_progname; + +static void +pcieb_usage(const char *fmt, ...) +{ + if (fmt != NULL) { + va_list ap; + + (void) fprintf(stderr, "%s: ", pcieb_progname); + va_start(ap, fmt); + (void) vfprintf(stderr, fmt, ap); + va_end(ap); + } + + (void) fprintf(stderr, "Usage: %s [-x] [-s speed] pcie-bridge\n" + "\n" + "\t-s speed Set link to speed\n", + "\t-x Retrain link\n", + pcieb_progname); + +} + +static uint32_t +pcieb_parse_speed(const char *s) +{ + if (strcasecmp(s, "2.5") == 0 || strcasecmp(s, "gen1") == 0) { + return (PCIEB_LINK_SPEED_GEN1); + } else if (strcasecmp(s, "5") == 0 || strcasecmp(s, "gen2") == 0) { + return (PCIEB_LINK_SPEED_GEN2); + } else if (strcasecmp(s, "8") == 0 || strcasecmp(s, "gen3") == 0) { + return (PCIEB_LINK_SPEED_GEN3); + } else if (strcasecmp(s, "16") == 0 || strcasecmp(s, "gen4") == 0) { + return (PCIEB_LINK_SPEED_GEN4); + } else { + errx(EXIT_FAILURE, "invalid speed: %s", s); + } +} + +int +main(int argc, char *argv[]) +{ + int c; + boolean_t retrain = B_FALSE; + boolean_t set = B_FALSE; + boolean_t get = B_TRUE; + uint32_t speed = PCIEB_LINK_SPEED_UNKNOWN; + int fd; + + pcieb_progname = basename(argv[0]); + + while ((c = getopt(argc, argv, ":xs:")) != -1) { + switch (c) { + case 's': + speed = pcieb_parse_speed(optarg); + set = B_TRUE; + get = B_FALSE; + break; + case 'x': + retrain = B_TRUE; + get = B_FALSE; + break; + case ':': + pcieb_usage("option -%c requires an operand\n", optopt); + return (2); + case '?': + default: + pcieb_usage("unknown option: -%c\n", optopt); + return (2); + + } + } + + argc -= optind; + argv += optind; + + if (argc != 1) { + pcieb_usage("missing required PCIe bridge device\n"); + return (2); + } + + if ((fd = open(argv[0], O_RDWR)) < 0) { + err(EXIT_FAILURE, "failed to open %s", argv[0]); + } + + if (set) { + pcieb_ioctl_target_speed_t pits; + + pits.pits_flags = 0; + pits.pits_speed = speed; + + if (ioctl(fd, PCIEB_IOCTL_SET_TARGET_SPEED, &pits) != 0) { + err(EXIT_FAILURE, "failed to set target speed"); + } + } + + if (retrain) { + if (ioctl(fd, PCIEB_IOCTL_RETRAIN) != 0) { + err(EXIT_FAILURE, "failed to retrain link"); + } + } + + if (get) { + pcieb_ioctl_target_speed_t pits; + + if (ioctl(fd, PCIEB_IOCTL_GET_TARGET_SPEED, &pits) != 0) { + err(EXIT_FAILURE, "failed to get target speed"); + } + + (void) printf("Bridge target speed: "); + switch (pits.pits_speed) { + case PCIEB_LINK_SPEED_GEN1: + (void) printf("2.5 GT/s (gen1)\n"); + break; + case PCIEB_LINK_SPEED_GEN2: + (void) printf("5.0 GT/s (gen2)\n"); + break; + case PCIEB_LINK_SPEED_GEN3: + (void) printf("8.0 GT/s (gen3)\n"); + break; + case PCIEB_LINK_SPEED_GEN4: + (void) printf("16.0 GT/s (gen4)\n"); + break; + default: + (void) printf("Unknown Value: 0x%x\n", pits.pits_speed); + } + + if ((pits.pits_flags & ~PCIEB_FLAGS_ADMIN_SET) != 0) { + (void) printf("Unknown flags: 0x%x\n", pits.pits_flags); + } else if ((pits.pits_flags & PCIEB_FLAGS_ADMIN_SET) != 0) { + (void) printf("Flags: Admin Set Speed\n"); + } + } + + (void) close(fd); + return (0); +} diff --git a/usr/src/lib/fm/topo/libtopo/common/topo_hc.h b/usr/src/lib/fm/topo/libtopo/common/topo_hc.h index df52b7c6e1..8f245c1cf0 100644 --- a/usr/src/lib/fm/topo/libtopo/common/topo_hc.h +++ b/usr/src/lib/fm/topo/libtopo/common/topo_hc.h @@ -124,6 +124,13 @@ extern "C" { #define TOPO_PCI_CLASS "class-code" #define TOPO_PCI_AADDR "assigned-addresses" +#define TOPO_PCI_MAX_WIDTH "link-maximum-width" +#define TOPO_PCI_CUR_WIDTH "link-current-width" +#define TOPO_PCI_MAX_SPEED "link-maximum-speed" +#define TOPO_PCI_CUR_SPEED "link-current-speed" +#define TOPO_PCI_SUP_SPEED "link-supported-speeds" +#define TOPO_PCI_ADMIN_SPEED "link-admin-target-speed" + #define TOPO_PGROUP_BINDING "binding" #define TOPO_BINDING_OCCUPANT "occupant-path" #define TOPO_BINDING_DRIVER "driver" diff --git a/usr/src/lib/fm/topo/modules/common/pcibus/did_props.c b/usr/src/lib/fm/topo/modules/common/pcibus/did_props.c index e216dec9d1..af4292830f 100644 --- a/usr/src/lib/fm/topo/modules/common/pcibus/did_props.c +++ b/usr/src/lib/fm/topo/modules/common/pcibus/did_props.c @@ -24,7 +24,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #include <assert.h> @@ -74,6 +74,14 @@ static int AADDR_set(tnode_t *, did_t *, const char *, const char *, const char *); static int maybe_pcidb_set(tnode_t *, did_t *, const char *, const char *, const char *); +static int maybe_di_int_to_uint32(tnode_t *, did_t *, + const char *, const char *, const char *); +static int maybe_pcie_speed(tnode_t *, did_t *, + const char *, const char *, const char *); +static int maybe_pcie_supported_speed(tnode_t *, did_t *, + const char *, const char *, const char *); +static int maybe_pcie_target_speed(tnode_t *, did_t *, + const char *, const char *, const char *); /* * Arrays of "property translation routines" to set the properties a @@ -169,7 +177,18 @@ txprop_t Fn_common_props[] = { txprop_t Dev_common_props[] = { { NULL, &protocol_pgroup, TOPO_PROP_LABEL, label_set }, { NULL, &protocol_pgroup, TOPO_PROP_FRU, FRU_set }, - { NULL, &protocol_pgroup, TOPO_PROP_ASRU, ASRU_set } + { NULL, &protocol_pgroup, TOPO_PROP_ASRU, ASRU_set }, + { DI_PCIE_MAX_WIDTH, &pci_pgroup, TOPO_PCI_MAX_WIDTH, + maybe_di_int_to_uint32 }, + { DI_PCIE_CUR_WIDTH, &pci_pgroup, TOPO_PCI_CUR_WIDTH, + maybe_di_int_to_uint32 }, + { DI_PCIE_MAX_SPEED, &pci_pgroup, TOPO_PCI_MAX_SPEED, + maybe_pcie_speed }, + { DI_PCIE_CUR_SPEED, &pci_pgroup, TOPO_PCI_CUR_SPEED, + maybe_pcie_speed }, + { DI_PCIE_SUP_SPEEDS, &pci_pgroup, TOPO_PCI_SUP_SPEED, + maybe_pcie_supported_speed }, + { NULL, &pci_pgroup, TOPO_PCI_ADMIN_SPEED, maybe_pcie_target_speed } }; txprop_t Bus_common_props[] = { @@ -1057,3 +1076,94 @@ did_props_set(tnode_t *tn, did_t *pd, txprop_t txarray[], int txnum) } return (0); } + +static int +maybe_di_int_to_uint32(tnode_t *tn, did_t *pd, const char *dpnm, + const char *tpgrp, const char *tpnm) +{ + int ret, *vals; + + ret = di_prop_lookup_ints(DDI_DEV_T_ANY, did_dinode(pd), dpnm, &vals); + if (ret != 1) { + return (0); + } + + if (topo_prop_set_uint32(tn, tpgrp, tpnm, 0, (uint32_t)*vals, &ret) != + 0) { + return (topo_mod_seterrno(did_mod(pd), ret)); + } + + return (0); +} + +static int +maybe_pcie_speed(tnode_t *tn, did_t *pd, const char *dpnm, const char *tpgrp, + const char *tpnm) +{ + int ret; + int64_t *vals; + + ret = di_prop_lookup_int64(DDI_DEV_T_ANY, did_dinode(pd), dpnm, &vals); + if (ret != 1) { + return (0); + } + + if (topo_prop_set_uint64(tn, tpgrp, tpnm, 0, (uint64_t)*vals, &ret) != + 0) { + return (topo_mod_seterrno(did_mod(pd), ret)); + } + return (0); +} + +static int +maybe_pcie_supported_speed(tnode_t *tn, did_t *pd, const char *dpnm, + const char *tpgrp, const char *tpnm) +{ + int ret; + uint_t count; + int64_t *vals; + + ret = di_prop_lookup_int64(DDI_DEV_T_ANY, did_dinode(pd), dpnm, &vals); + if (ret < 1) { + return (0); + } + + count = (uint_t)ret; + if (topo_prop_set_uint64_array(tn, tpgrp, tpnm, 0, (uint64_t *)vals, + count, &ret) != 0) { + return (topo_mod_seterrno(did_mod(pd), ret)); + } + return (0); +} + +static int +maybe_pcie_target_speed(tnode_t *tn, did_t *pd, const char *dpnm, + const char *tpgrp, const char *tpnm) +{ + di_prop_t prop = DI_PROP_NIL; + boolean_t admin = B_FALSE; + int64_t *val = NULL; + int ret; + + while ((prop = di_prop_next(did_dinode(pd), prop)) != DI_PROP_NIL) { + const char *n = di_prop_name(prop); + + if (strcmp(DI_PCIE_ADMIN_TAG, n) == 0) { + admin = B_TRUE; + } else if (strcmp(DI_PCIE_TARG_SPEED, n) == 0) { + if (di_prop_int64(prop, &val) != 1) { + val = NULL; + } + } + } + + if (!admin || val == NULL) { + return (0); + } + + if (topo_prop_set_uint64(tn, tpgrp, tpnm, 0, (uint64_t)*val, &ret) != + 0) { + return (topo_mod_seterrno(did_mod(pd), ret)); + } + return (0); +} diff --git a/usr/src/lib/fm/topo/modules/common/pcibus/did_props.h b/usr/src/lib/fm/topo/modules/common/pcibus/did_props.h index cddf90e79d..eb42432573 100644 --- a/usr/src/lib/fm/topo/modules/common/pcibus/did_props.h +++ b/usr/src/lib/fm/topo/modules/common/pcibus/did_props.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #ifndef _DID_PROPS_H @@ -73,6 +73,14 @@ typedef struct txprop { #define DI_RECEPTACLE_PHYMASK "receptacle-pm" #define DI_RECEPTACLE_LABEL "receptacle-label" +#define DI_PCIE_MAX_WIDTH "pcie-link-maximum-width" +#define DI_PCIE_CUR_WIDTH "pcie-link-current-width" +#define DI_PCIE_CUR_SPEED "pcie-link-current-speed" +#define DI_PCIE_MAX_SPEED "pcie-link-maximum-speed" +#define DI_PCIE_SUP_SPEEDS "pcie-link-supported-speeds" +#define DI_PCIE_TARG_SPEED "pcie-link-target-speed" +#define DI_PCIE_ADMIN_TAG "pcie-link-admin-target-speed" + extern int did_props_set(tnode_t *, did_t *, txprop_t[], int); extern tnode_t *find_predecessor(tnode_t *, char *); diff --git a/usr/src/uts/common/io/pciex/hotplug/pcie_hp.c b/usr/src/uts/common/io/pciex/hotplug/pcie_hp.c index 9a2163f9b0..8adfb4f6f9 100644 --- a/usr/src/uts/common/io/pciex/hotplug/pcie_hp.c +++ b/usr/src/uts/common/io/pciex/hotplug/pcie_hp.c @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ /* @@ -469,7 +470,7 @@ pcie_hp_create_occupant_props(dev_info_t *dip, dev_t dev, int pci_dev) { pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); pcie_hp_ctrl_t *ctrl_p = (pcie_hp_ctrl_t *)bus_p->bus_hp_ctrl; - pcie_hp_slot_t *slotp; + pcie_hp_slot_t *slotp = NULL; pcie_hp_cn_cfg_t cn_cfg; pcie_hp_occupant_info_t *occupant; int circular, i; diff --git a/usr/src/uts/common/io/pciex/hotplug/pciehpc.c b/usr/src/uts/common/io/pciex/hotplug/pciehpc.c index 94a32abf1b..5ce219bd2f 100644 --- a/usr/src/uts/common/io/pciex/hotplug/pciehpc.c +++ b/usr/src/uts/common/io/pciex/hotplug/pciehpc.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2019 Joyent, Inc. */ /* @@ -1585,8 +1586,9 @@ pciehpc_slot_get_property(pcie_hp_slot_t *slot_p, ddi_hp_property_t *arg, /* for each requested property, get the value and add it to nvlist */ prop_pair = NULL; - while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) { + while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) { name = nvpair_name(prop_pair); + value = NULL; if (strcmp(name, PCIEHPC_PROP_LED_FAULT) == 0) { value = pcie_led_state_text( @@ -1795,7 +1797,7 @@ set_prop_cleanup1: /* Validate the request */ prop_pair = NULL; - while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) { + while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) { name = nvpair_name(prop_pair); if (nvpair_type(prop_pair) != DATA_TYPE_STRING) { PCIE_DBG("Unexpected data type of setting " @@ -1832,9 +1834,12 @@ set_prop_cleanup1: /* set each property */ prop_pair = NULL; - while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) { + while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) { name = nvpair_name(prop_pair); + /* + * The validity of the property was checked above. + */ if (strcmp(name, PCIEHPC_PROP_LED_ATTN) == 0) { if (strcmp(value, PCIEHPC_PROP_VALUE_ON) == 0) led_state = PCIE_HP_LED_ON; @@ -1842,6 +1847,8 @@ set_prop_cleanup1: led_state = PCIE_HP_LED_OFF; else if (strcmp(value, PCIEHPC_PROP_VALUE_BLINK) == 0) led_state = PCIE_HP_LED_BLINK; + else + continue; pciehpc_set_led_state(ctrl_p, PCIE_HP_ATTN_LED, led_state); diff --git a/usr/src/uts/common/io/pciex/hotplug/pcishpc.c b/usr/src/uts/common/io/pciex/hotplug/pcishpc.c index 22e7418096..d6057de1b8 100644 --- a/usr/src/uts/common/io/pciex/hotplug/pcishpc.c +++ b/usr/src/uts/common/io/pciex/hotplug/pcishpc.c @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ /* @@ -449,8 +450,9 @@ pcishpc_slot_get_property(pcie_hp_slot_t *slot_p, ddi_hp_property_t *arg, /* for each requested property, get the value and add it to nvlist */ prop_pair = NULL; - while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) { + while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) { name = nvpair_name(prop_pair); + value = NULL; if (strcmp(name, PCIEHPC_PROP_LED_FAULT) == 0) { value = pcie_led_state_text( @@ -661,7 +663,7 @@ set_prop_cleanup1: /* Validate the request */ prop_pair = NULL; - while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) { + while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) { name = nvpair_name(prop_pair); if (nvpair_type(prop_pair) != DATA_TYPE_STRING) { PCIE_DBG("Unexpected data type of setting " @@ -699,7 +701,7 @@ set_prop_cleanup1: // set each property prop_pair = NULL; - while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) { + while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) { name = nvpair_name(prop_pair); if (strcmp(name, PCIEHPC_PROP_LED_ATTN) == 0) { @@ -709,6 +711,8 @@ set_prop_cleanup1: led_state = PCIE_HP_LED_OFF; else if (strcmp(value, PCIEHPC_PROP_VALUE_BLINK) == 0) led_state = PCIE_HP_LED_BLINK; + else + continue; (void) pcishpc_setled(slot_p, PCIE_HP_ATTN_LED, led_state); diff --git a/usr/src/uts/common/io/pciex/pcie.c b/usr/src/uts/common/io/pciex/pcie.c index b06e750888..1922f821f3 100644 --- a/usr/src/uts/common/io/pciex/pcie.c +++ b/usr/src/uts/common/io/pciex/pcie.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019, Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #include <sys/sysmacros.h> @@ -45,6 +45,9 @@ #include <sys/hotplug/pci/pcishpc.h> #include <sys/hotplug/pci/pcicfg.h> #include <sys/pci_cfgacc.h> +#include <sys/sysevent.h> +#include <sys/sysevent/eventdefs.h> +#include <sys/sysevent/pcie.h> /* Local functions prototypes */ static void pcie_init_pfd(dev_info_t *); @@ -141,12 +144,24 @@ uint32_t pcie_aer_suce_severity = PCIE_AER_SUCE_SERR_ASSERT | \ int pcie_max_mps = PCIE_DEVCTL_MAX_PAYLOAD_4096 >> 5; int pcie_disable_ari = 0; +/* + * Amount of time to wait for an in-progress retraining. The default is to try + * 500 times in 10ms chunks, thus a total of 5s. + */ +uint32_t pcie_link_retrain_count = 500; +uint32_t pcie_link_retrain_delay_ms = 10; + +taskq_t *pcie_link_tq; +kmutex_t pcie_link_tq_mutex; + static void pcie_scan_mps(dev_info_t *rc_dip, dev_info_t *dip, int *max_supported); static int pcie_get_max_supported(dev_info_t *dip, void *arg); static int pcie_map_phys(dev_info_t *dip, pci_regspec_t *phys_spec, caddr_t *addrp, ddi_acc_handle_t *handlep); static void pcie_unmap_phys(ddi_acc_handle_t *handlep, pci_regspec_t *ph); +static int pcie_link_bw_intr(dev_info_t *); +static void pcie_capture_speeds(dev_info_t *); dev_info_t *pcie_get_rc_dip(dev_info_t *dip); @@ -182,8 +197,10 @@ _init(void) pcie_nv_buf = kmem_alloc(ERPT_DATA_SZ, KM_SLEEP); pcie_nvap = fm_nva_xcreate(pcie_nv_buf, ERPT_DATA_SZ); pcie_nvl = fm_nvlist_create(pcie_nvap); + mutex_init(&pcie_link_tq_mutex, NULL, MUTEX_DRIVER, NULL); if ((rval = mod_install(&modlinkage)) != 0) { + mutex_destroy(&pcie_link_tq_mutex); fm_nvlist_destroy(pcie_nvl, FM_NVA_RETAIN); fm_nva_xdestroy(pcie_nvap); kmem_free(pcie_nv_buf, ERPT_DATA_SZ); @@ -197,6 +214,10 @@ _fini() int rval; if ((rval = mod_remove(&modlinkage)) == 0) { + if (pcie_link_tq != NULL) { + taskq_destroy(pcie_link_tq); + } + mutex_destroy(&pcie_link_tq_mutex); fm_nvlist_destroy(pcie_nvl, FM_NVA_RETAIN); fm_nva_xdestroy(pcie_nvap); kmem_free(pcie_nv_buf, ERPT_DATA_SZ); @@ -217,6 +238,18 @@ pcie_init(dev_info_t *dip, caddr_t arg) int ret = DDI_SUCCESS; /* + * Our _init function is too early to create a taskq. Create the pcie + * link management taskq here now instead. + */ + mutex_enter(&pcie_link_tq_mutex); + if (pcie_link_tq == NULL) { + pcie_link_tq = taskq_create("pcie_link", 1, minclsyspri, 0, 0, + 0); + } + mutex_exit(&pcie_link_tq_mutex); + + + /* * Create a "devctl" minor node to support DEVCTL_DEVICE_* * and DEVCTL_BUS_* ioctls to this bus. */ @@ -270,6 +303,10 @@ pcie_uninit(dev_info_t *dip) return (ret); } + if (pcie_link_bw_supported(dip)) { + (void) pcie_link_bw_disable(dip); + } + ddi_remove_minor_node(dip, "devctl"); return (ret); @@ -319,7 +356,16 @@ pcie_hpintr_disable(dev_info_t *dip) int pcie_intr(dev_info_t *dip) { - return (pcie_hp_intr(dip)); + int hp, lbw; + + hp = pcie_hp_intr(dip); + lbw = pcie_link_bw_intr(dip); + + if (hp == DDI_INTR_CLAIMED || lbw == DDI_INTR_CLAIMED) { + return (DDI_INTR_CLAIMED); + } + + return (DDI_INTR_UNCLAIMED); } /* ARGSUSED */ @@ -657,6 +703,8 @@ pcie_initchild(dev_info_t *cdip) pcie_enable_errors(cdip); pcie_determine_serial(cdip); + + pcie_capture_speeds(cdip); } bus_p->bus_ari = B_FALSE; @@ -939,6 +987,120 @@ pcie_rc_fini_bus(dev_info_t *dip) kmem_free(bus_p, sizeof (pcie_bus_t)); } +static int +pcie_width_to_int(pcie_link_width_t width) +{ + switch (width) { + case PCIE_LINK_WIDTH_X1: + return (1); + case PCIE_LINK_WIDTH_X2: + return (2); + case PCIE_LINK_WIDTH_X4: + return (4); + case PCIE_LINK_WIDTH_X8: + return (8); + case PCIE_LINK_WIDTH_X12: + return (12); + case PCIE_LINK_WIDTH_X16: + return (16); + case PCIE_LINK_WIDTH_X32: + return (32); + default: + return (0); + } +} + +/* + * Return the speed in Transfers / second. This is a signed quantity to match + * the ndi/ddi property interfaces. + */ +static int64_t +pcie_speed_to_int(pcie_link_speed_t speed) +{ + switch (speed) { + case PCIE_LINK_SPEED_2_5: + return (2500000000LL); + case PCIE_LINK_SPEED_5: + return (5000000000LL); + case PCIE_LINK_SPEED_8: + return (8000000000LL); + case PCIE_LINK_SPEED_16: + return (16000000000LL); + default: + return (0); + } +} + +/* + * Translate the recorded speed information into devinfo properties. + */ +static void +pcie_speeds_to_devinfo(dev_info_t *dip, pcie_bus_t *bus_p) +{ + if (bus_p->bus_max_width != PCIE_LINK_WIDTH_UNKNOWN) { + (void) ndi_prop_update_int(DDI_DEV_T_NONE, dip, + "pcie-link-maximum-width", + pcie_width_to_int(bus_p->bus_max_width)); + } + + if (bus_p->bus_cur_width != PCIE_LINK_WIDTH_UNKNOWN) { + (void) ndi_prop_update_int(DDI_DEV_T_NONE, dip, + "pcie-link-current-width", + pcie_width_to_int(bus_p->bus_cur_width)); + } + + if (bus_p->bus_cur_speed != PCIE_LINK_SPEED_UNKNOWN) { + (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip, + "pcie-link-current-speed", + pcie_speed_to_int(bus_p->bus_cur_speed)); + } + + if (bus_p->bus_max_speed != PCIE_LINK_SPEED_UNKNOWN) { + (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip, + "pcie-link-maximum-speed", + pcie_speed_to_int(bus_p->bus_max_speed)); + } + + if (bus_p->bus_target_speed != PCIE_LINK_SPEED_UNKNOWN) { + (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip, + "pcie-link-target-speed", + pcie_speed_to_int(bus_p->bus_target_speed)); + } + + if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) { + (void) ndi_prop_create_boolean(DDI_DEV_T_NONE, dip, + "pcie-link-admin-target-speed"); + } + + if (bus_p->bus_sup_speed != PCIE_LINK_SPEED_UNKNOWN) { + int64_t speeds[4]; + uint_t nspeeds = 0; + + if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_2_5) { + speeds[nspeeds++] = + pcie_speed_to_int(PCIE_LINK_SPEED_2_5); + } + + if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_5) { + speeds[nspeeds++] = + pcie_speed_to_int(PCIE_LINK_SPEED_5); + } + + if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_8) { + speeds[nspeeds++] = + pcie_speed_to_int(PCIE_LINK_SPEED_8); + } + + if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_16) { + speeds[nspeeds++] = + pcie_speed_to_int(PCIE_LINK_SPEED_16); + } + + (void) ndi_prop_update_int64_array(DDI_DEV_T_NONE, dip, + "pcie-link-supported-speeds", speeds, nspeeds); + } +} + /* * We need to capture the supported, maximum, and current device speed and * width. The way that this has been done has changed over time. @@ -952,18 +1114,20 @@ pcie_rc_fini_bus(dev_info_t *dip) * Now, a device may not implement some of these registers. To determine whether * or not it's here, we have to do the following. First, we need to check the * revision of the PCI express capability. The link capabilities 2 register did - * not exist prior to version 2 of this register. + * not exist prior to version 2 of this capability. If a modern device does not + * implement it, it is supposed to return zero for the register. */ static void -pcie_capture_speeds(pcie_bus_t *bus_p, pcie_req_id_t bdf, dev_info_t *rcdip) +pcie_capture_speeds(dev_info_t *dip) { uint16_t vers, status; - uint32_t val, cap, cap2; + uint32_t cap, cap2, ctl2; + pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); if (!PCIE_IS_PCIE(bus_p)) return; - vers = pci_cfgacc_get16(rcdip, bdf, bus_p->bus_pcie_off + PCIE_PCIECAP); + vers = PCIE_CAP_GET(16, bus_p, PCIE_PCIECAP); if (vers == PCI_EINVAL16) return; vers &= PCIE_PCIECAP_VER_MASK; @@ -974,24 +1138,28 @@ pcie_capture_speeds(pcie_bus_t *bus_p, pcie_req_id_t bdf, dev_info_t *rcdip) switch (vers) { case PCIE_PCIECAP_VER_1_0: cap2 = 0; + ctl2 = 0; break; case PCIE_PCIECAP_VER_2_0: - cap2 = pci_cfgacc_get32(rcdip, bdf, bus_p->bus_pcie_off + - PCIE_LINKCAP2); + cap2 = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP2); if (cap2 == PCI_EINVAL32) cap2 = 0; + ctl2 = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL2); + if (ctl2 == PCI_EINVAL16) + ctl2 = 0; break; default: /* Don't try and handle an unknown version */ return; } - status = pci_cfgacc_get16(rcdip, bdf, bus_p->bus_pcie_off + - PCIE_LINKSTS); - cap = pci_cfgacc_get32(rcdip, bdf, bus_p->bus_pcie_off + PCIE_LINKCAP); + status = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS); + cap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP); if (status == PCI_EINVAL16 || cap == PCI_EINVAL32) return; + mutex_enter(&bus_p->bus_speed_mutex); + switch (status & PCIE_LINKSTS_SPEED_MASK) { case PCIE_LINKSTS_SPEED_2_5: bus_p->bus_cur_speed = PCIE_LINK_SPEED_2_5; @@ -1104,13 +1272,32 @@ pcie_capture_speeds(pcie_bus_t *bus_p, pcie_req_id_t bdf, dev_info_t *rcdip) bus_p->bus_max_speed = PCIE_LINK_SPEED_5; bus_p->bus_sup_speed = PCIE_LINK_SPEED_2_5 | PCIE_LINK_SPEED_5; - } - - if (cap & PCIE_LINKCAP_MAX_SPEED_2_5) { + } else if (cap & PCIE_LINKCAP_MAX_SPEED_2_5) { bus_p->bus_max_speed = PCIE_LINK_SPEED_2_5; bus_p->bus_sup_speed = PCIE_LINK_SPEED_2_5; } } + + switch (ctl2 & PCIE_LINKCTL2_TARGET_SPEED_MASK) { + case PCIE_LINKCTL2_TARGET_SPEED_2_5: + bus_p->bus_target_speed = PCIE_LINK_SPEED_2_5; + break; + case PCIE_LINKCTL2_TARGET_SPEED_5: + bus_p->bus_target_speed = PCIE_LINK_SPEED_5; + break; + case PCIE_LINKCTL2_TARGET_SPEED_8: + bus_p->bus_target_speed = PCIE_LINK_SPEED_8; + break; + case PCIE_LINKCTL2_TARGET_SPEED_16: + bus_p->bus_target_speed = PCIE_LINK_SPEED_16; + break; + default: + bus_p->bus_target_speed = PCIE_LINK_SPEED_UNKNOWN; + break; + } + + pcie_speeds_to_devinfo(dip, bus_p); + mutex_exit(&bus_p->bus_speed_mutex); } /* @@ -1186,7 +1373,7 @@ pcie_init_bus(dev_info_t *dip, pcie_req_id_t bdf, uint8_t flags) uint16_t status, base, baseptr, num_cap; uint32_t capid; int range_size; - pcie_bus_t *bus_p; + pcie_bus_t *bus_p = NULL; dev_info_t *rcdip; dev_info_t *pdip; const char *errstr = NULL; @@ -1406,15 +1593,15 @@ initial_done: pcie_init_plat(dip); - pcie_capture_speeds(bus_p, bdf, rcdip); - final_done: PCIE_DBG("Add %s(dip 0x%p, bdf 0x%x, secbus 0x%x)\n", ddi_driver_name(dip), (void *)dip, bus_p->bus_bdf, bus_p->bus_bdg_secbus); #ifdef DEBUG - pcie_print_bus(bus_p); + if (bus_p != NULL) { + pcie_print_bus(bus_p); + } #endif return (bus_p); @@ -2640,3 +2827,337 @@ pcie_check_io_mem_range(ddi_acc_handle_t cfg_hdl, boolean_t *empty_io_range, } #endif /* defined(__i386) || defined(__amd64) */ + +boolean_t +pcie_link_bw_supported(dev_info_t *dip) +{ + uint32_t linkcap; + pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); + + if (!PCIE_IS_PCIE(bus_p)) { + return (B_FALSE); + } + + if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) { + return (B_FALSE); + } + + linkcap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP); + return ((linkcap & PCIE_LINKCAP_LINK_BW_NOTIFY_CAP) != 0); +} + +int +pcie_link_bw_enable(dev_info_t *dip) +{ + uint16_t linkctl; + pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); + + if (!pcie_link_bw_supported(dip)) { + return (DDI_FAILURE); + } + + mutex_init(&bus_p->bus_lbw_mutex, NULL, MUTEX_DRIVER, NULL); + cv_init(&bus_p->bus_lbw_cv, NULL, CV_DRIVER, NULL); + linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL); + linkctl |= PCIE_LINKCTL_LINK_BW_INTR_EN; + linkctl |= PCIE_LINKCTL_LINK_AUTO_BW_INTR_EN; + PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, linkctl); + + bus_p->bus_lbw_pbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + bus_p->bus_lbw_cbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + bus_p->bus_lbw_state |= PCIE_LBW_S_ENABLED; + + return (DDI_SUCCESS); +} + +int +pcie_link_bw_disable(dev_info_t *dip) +{ + uint16_t linkctl; + pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); + + if ((bus_p->bus_lbw_state & PCIE_LBW_S_ENABLED) == 0) { + return (DDI_FAILURE); + } + + mutex_enter(&bus_p->bus_lbw_mutex); + while ((bus_p->bus_lbw_state & + (PCIE_LBW_S_DISPATCHED | PCIE_LBW_S_RUNNING)) != 0) { + cv_wait(&bus_p->bus_lbw_cv, &bus_p->bus_lbw_mutex); + } + mutex_exit(&bus_p->bus_lbw_mutex); + + linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL); + linkctl &= ~PCIE_LINKCTL_LINK_BW_INTR_EN; + linkctl &= ~PCIE_LINKCTL_LINK_AUTO_BW_INTR_EN; + PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, linkctl); + + bus_p->bus_lbw_state &= ~PCIE_LBW_S_ENABLED; + kmem_free(bus_p->bus_lbw_pbuf, MAXPATHLEN); + kmem_free(bus_p->bus_lbw_cbuf, MAXPATHLEN); + bus_p->bus_lbw_pbuf = NULL; + bus_p->bus_lbw_cbuf = NULL; + + mutex_destroy(&bus_p->bus_lbw_mutex); + cv_destroy(&bus_p->bus_lbw_cv); + + return (DDI_SUCCESS); +} + +void +pcie_link_bw_taskq(void *arg) +{ + dev_info_t *dip = arg; + pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); + dev_info_t *cdip; + boolean_t again; + sysevent_t *se; + sysevent_value_t se_val; + sysevent_id_t eid; + sysevent_attr_list_t *ev_attr_list; + int circular; + +top: + ndi_devi_enter(dip, &circular); + se = NULL; + ev_attr_list = NULL; + mutex_enter(&bus_p->bus_lbw_mutex); + bus_p->bus_lbw_state &= ~PCIE_LBW_S_DISPATCHED; + bus_p->bus_lbw_state |= PCIE_LBW_S_RUNNING; + mutex_exit(&bus_p->bus_lbw_mutex); + + /* + * Update our own speeds as we've likely changed something. + */ + pcie_capture_speeds(dip); + + /* + * Walk our children. We only care about updating this on function 0 + * because the PCIe specification requires that these all be the same + * otherwise. + */ + for (cdip = ddi_get_child(dip); cdip != NULL; + cdip = ddi_get_next_sibling(cdip)) { + pcie_bus_t *cbus_p = PCIE_DIP2BUS(cdip); + + if (cbus_p == NULL) { + continue; + } + + if ((cbus_p->bus_bdf & PCIE_REQ_ID_FUNC_MASK) != 0) { + continue; + } + + /* + * It's possible that this can fire while a child is otherwise + * only partially constructed. Therefore, if we don't have the + * config handle, don't bother updating the child. + */ + if (cbus_p->bus_cfg_hdl == NULL) { + continue; + } + + pcie_capture_speeds(cdip); + break; + } + + se = sysevent_alloc(EC_PCIE, ESC_PCIE_LINK_STATE, + ILLUMOS_KERN_PUB "pcie", SE_SLEEP); + + (void) ddi_pathname(dip, bus_p->bus_lbw_pbuf); + se_val.value_type = SE_DATA_TYPE_STRING; + se_val.value.sv_string = bus_p->bus_lbw_pbuf; + if (sysevent_add_attr(&ev_attr_list, PCIE_EV_DETECTOR_PATH, &se_val, + SE_SLEEP) != 0) { + ndi_devi_exit(dip, circular); + goto err; + } + + if (cdip != NULL) { + (void) ddi_pathname(cdip, bus_p->bus_lbw_cbuf); + + se_val.value_type = SE_DATA_TYPE_STRING; + se_val.value.sv_string = bus_p->bus_lbw_cbuf; + + /* + * If this fails, that's OK. We'd rather get the event off and + * there's a chance that there may not be anything there for us. + */ + (void) sysevent_add_attr(&ev_attr_list, PCIE_EV_CHILD_PATH, + &se_val, SE_SLEEP); + } + + ndi_devi_exit(dip, circular); + + /* + * Before we generate and send down a sysevent, we need to tell the + * system that parts of the devinfo cache need to be invalidated. While + * the function below takes several args, it ignores them all. Because + * this is a global invalidation, we don't bother trying to do much more + * than requesting a global invalidation, lest we accidentally kick off + * several in a row. + */ + ddi_prop_cache_invalidate(DDI_DEV_T_NONE, NULL, NULL, 0); + + if (sysevent_attach_attributes(se, ev_attr_list) != 0) { + goto err; + } + ev_attr_list = NULL; + + if (log_sysevent(se, SE_SLEEP, &eid) != 0) { + goto err; + } + +err: + sysevent_free_attr(ev_attr_list); + sysevent_free(se); + + mutex_enter(&bus_p->bus_lbw_mutex); + bus_p->bus_lbw_state &= ~PCIE_LBW_S_RUNNING; + cv_broadcast(&bus_p->bus_lbw_cv); + again = (bus_p->bus_lbw_state & PCIE_LBW_S_DISPATCHED) != 0; + mutex_exit(&bus_p->bus_lbw_mutex); + + if (again) { + goto top; + } +} + +int +pcie_link_bw_intr(dev_info_t *dip) +{ + pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); + uint16_t linksts; + uint16_t flags = PCIE_LINKSTS_LINK_BW_MGMT | PCIE_LINKSTS_AUTO_BW; + dev_info_t *cdip; + sysevent_t *se = NULL; + sysevent_value_t se_val; + sysevent_id_t eid; + sysevent_attr_list_t *ev_attr_list = NULL; + + if ((bus_p->bus_lbw_state & PCIE_LBW_S_ENABLED) == 0) { + return (DDI_INTR_UNCLAIMED); + } + + linksts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS); + if ((linksts & flags) == 0) { + return (DDI_INTR_UNCLAIMED); + } + + /* + * Check if we've already dispatched this event. If we have already + * dispatched it, then there's nothing else to do, we coalesce multiple + * events. + */ + mutex_enter(&bus_p->bus_lbw_mutex); + bus_p->bus_lbw_nevents++; + if ((bus_p->bus_lbw_state & PCIE_LBW_S_DISPATCHED) == 0) { + if ((bus_p->bus_lbw_state & PCIE_LBW_S_RUNNING) == 0) { + taskq_dispatch_ent(pcie_link_tq, pcie_link_bw_taskq, + dip, 0, &bus_p->bus_lbw_ent); + } + + bus_p->bus_lbw_state |= PCIE_LBW_S_DISPATCHED; + } + mutex_exit(&bus_p->bus_lbw_mutex); + + PCIE_CAP_PUT(16, bus_p, PCIE_LINKSTS, flags); + return (DDI_INTR_CLAIMED); +} + +int +pcie_link_set_target(dev_info_t *dip, pcie_link_speed_t speed) +{ + uint16_t ctl2, rval; + pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); + + if (!PCIE_IS_PCIE(bus_p)) { + return (ENOTSUP); + } + + if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) { + return (ENOTSUP); + } + + switch (speed) { + case PCIE_LINK_SPEED_2_5: + rval = PCIE_LINKCTL2_TARGET_SPEED_2_5; + break; + case PCIE_LINK_SPEED_5: + rval = PCIE_LINKCTL2_TARGET_SPEED_5; + break; + case PCIE_LINK_SPEED_8: + rval = PCIE_LINKCTL2_TARGET_SPEED_8; + break; + case PCIE_LINK_SPEED_16: + rval = PCIE_LINKCTL2_TARGET_SPEED_16; + break; + default: + return (EINVAL); + } + + mutex_enter(&bus_p->bus_speed_mutex); + bus_p->bus_target_speed = speed; + bus_p->bus_speed_flags |= PCIE_LINK_F_ADMIN_TARGET; + + ctl2 = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL2); + ctl2 &= ~PCIE_LINKCTL2_TARGET_SPEED_MASK; + ctl2 |= rval; + PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL2, ctl2); + mutex_exit(&bus_p->bus_speed_mutex); + + /* + * Make sure our updates have been reflected in devinfo. + */ + pcie_capture_speeds(dip); + + return (0); +} + +int +pcie_link_retrain(dev_info_t *dip) +{ + uint16_t ctl; + pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); + + if (!PCIE_IS_PCIE(bus_p)) { + return (ENOTSUP); + } + + if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) { + return (ENOTSUP); + } + + /* + * The PCIe specification suggests that we make sure that the link isn't + * in training before issuing this command in case there was a state + * machine transition prior to when we got here. We wait and then go + * ahead and issue the command anyways. + */ + for (uint32_t i = 0; i < pcie_link_retrain_count; i++) { + uint16_t sts; + + sts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS); + if ((sts & PCIE_LINKSTS_LINK_TRAINING) == 0) + break; + delay(drv_usectohz(pcie_link_retrain_delay_ms * 1000)); + } + + ctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL); + ctl |= PCIE_LINKCTL_RETRAIN_LINK; + PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, ctl); + + /* + * Wait again to see if it clears before returning to the user. + */ + for (uint32_t i = 0; i < pcie_link_retrain_count; i++) { + uint16_t sts; + + sts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS); + if ((sts & PCIE_LINKSTS_LINK_TRAINING) == 0) + break; + delay(drv_usectohz(pcie_link_retrain_delay_ms * 1000)); + } + + return (0); +} diff --git a/usr/src/uts/common/io/pciex/pcie_pwr.c b/usr/src/uts/common/io/pciex/pcie_pwr.c index 41ffa02726..9aeee8d238 100644 --- a/usr/src/uts/common/io/pciex/pcie_pwr.c +++ b/usr/src/uts/common/io/pciex/pcie_pwr.c @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ #include <sys/types.h> @@ -752,7 +753,7 @@ pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p) * Called when the child makes the first power management call. * sets up the counters. All the components of the child device are * assumed to be at unknown level. It also releases the power hold - * pwr_p - parent's pwr_t + * pwr_p - parent's pwr_t * cdip - child's dip */ int @@ -908,7 +909,8 @@ pcie_pwr_resume(dev_info_t *dip) pcie_clear_errors(cdip); /* PCIe workaround: disable errors during 4K config resore */ - if (is_pcie = pcie_is_pcie(cdip)) + is_pcie = pcie_is_pcie(cdip); + if (is_pcie) pcie_disable_errors(cdip); (void) pci_restore_config_regs(cdip); if (is_pcie) { @@ -1041,7 +1043,8 @@ pcie_pwr_suspend(dev_info_t *dip) ddi_driver_name(cdip), ddi_get_instance(cdip)); /* PCIe workaround: disable errors during 4K config save */ - if (is_pcie = pcie_is_pcie(cdip)) + is_pcie = pcie_is_pcie(cdip); + if (is_pcie) pcie_disable_errors(cdip); (void) pci_save_config_regs(cdip); if (is_pcie) { diff --git a/usr/src/uts/common/io/pciex/pcieb.c b/usr/src/uts/common/io/pciex/pcieb.c index bdf7a61016..c9d65748bb 100644 --- a/usr/src/uts/common/io/pciex/pcieb.c +++ b/usr/src/uts/common/io/pciex/pcieb.c @@ -23,10 +23,121 @@ */ /* * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. + * Copyright 2019 Joyent, Inc. */ /* * Common x86 and SPARC PCI-E to PCI bus bridge nexus driver + * + * Background + * ---------- + * + * The PCI Express (PCIe) specification defines that all of the PCIe devices in + * the system are connected together in a series of different fabrics. A way to + * think of these fabrics is that they are small networks where there are links + * between different devices and switches that allow fan out or fan in of the + * fabric. The entry point to that fabric is called a root complex and the + * fabric terminates at a what is called an endpoint, which is really just PCIe + * terminology for the common cards that are inserted into the system (HBAs, + * NICs, USB, NVMe, etc.). + * + * The PCIe specification states that every link on the system has a virtual + * PCI-to-PCI bridge. This allows PCIe devices to still be configured the same + * way traditional PCI devices are to the operating system and allows them to + * have a traditional PCI bus, device, and function associated with them, even + * though there is no actual shared bus. In addition, bridges are also used to + * connect traditional PCI and PCI-X devices into them. + * + * The PCIe specification refers to upstream and downstream ports. Upstream + * ports are considered closer the root complex and downstream ports are closer + * to the endpoint. We can divide the devices that the bridge driver attaches to + * into two groups. Those that are considered upstream ports, these include root + * complexes and parts of PCIe switches. And downstream ports, which are the + * other half of PCIe switches and endpoints (which this driver does not attach + * to, normal hardware-specific or class-specific drivers attach to those). + * + * Interrupt Management + * -------------------- + * + * Upstream ports of bridges have additional things that we care about. + * Specifically they're the means through which we find out about: + * + * - Advanced Error Reporting (AERs) + * - Hotplug events + * - Link Bandwidth Events + * - Power Management Events (PME) + * + * Each of these features is an optional feature (though ones we hope are + * implemented). The features above are grouped into two different buckets based + * on which PCI capability they appear in. AER management is done through a PCI + * Express extended configuration header (it lives in extended PCI configuration + * space) called the 'Advanced Error Reporting Extended Capability'. The other + * events are all managed as part of the 'PCI Express Capability Structure'. + * This structure is found in traditional PCI configuration space. + * + * The way that the interrupts are programmed for these types of events differs + * a bit from the way one might expect a normal device to operate. For most + * devices, one allocates a number of interrupts based on a combination of what + * the device supports, what the OS supports per device, and the number the + * driver needs. Then the driver programs the device in a device-specific manner + * to indicate which events should trigger a specific interrupt vector. + * + * However, for both the AER and PCI capabilities, the driver has to do + * something different. The driver first allocates interrupts by programming the + * MSI or MSI-X table and then asks the device which interrupts have been + * assigned to these purposes. Because these events are only supported in + * 'upstream' devices, this does not interfere with the traditional management + * of MSI and MSI-X interrupts. At this time, the pcieb driver only supports the + * use of MSI interrupts. + * + * Once the interrupts have been allocated, we read back which vectors have been + * nominated by the device to cover the corresponding capability. The interrupt + * is allocated on a per-capability basis. Therefore, one interrupt would cover + * AERs, while another interrupt would cover the rest of the desired functions. + * + * To track which interrupts cover which behaviors, each driver state + * (pcieb_devstate_t) has a member called 'pcieb_isr_tab'. Each index represents + * an interrupt vector and there are a series of flags that represent the + * different possible interrupt sources: PCIEB_INTR_SRC_HP (hotplug), + * PCEIB_INTR_SRC_PME (power management event), PCIEB_INTR_SRC_AER (error + * reporting), PCIEB_INTR_SRC_LBW (link bandwidth). + * + * Because the hotplug, link bandwidth, and power management events all share + * the same vector, if an interrupt comes in, we must check all of the enabled + * sources that might generate this interrupt. It is highly likely that more + * than one will fire at the same time, for example, a hotplug event that fires + * because a device has been inserted or removed, will likely trigger a link + * bandwidth event. + * + * The pcieb driver itself does not actually have much logic to deal with and + * clear the interrupts in question. It generally speaking will vector most + * events back to the more general pcie driver or, in the case of AERs, initiate + * a scan of the fabric itself (also part of the pcie driver). + * + * Link Management + * --------------- + * + * The pcieb driver is used to take care of two different aspects of link + * management. The first of these, as described briefly above, is to monitor for + * changes to the negotiated link bandwidth. These events are managed by + * enabling support for the interrupts in the PCI Express Capability Structure. + * This is all taken care of by the pcie driver through functions like + * pcie_link_bw_enabled(). + * + * The second aspect of link management the pcieb driver enables is the ability + * to retrain the link and optionally limit the speed. This is enabled through a + * series of private ioctls that are driven through a private userland utility, + * /usr/lib/pci/pcieb. Eventually, this should be more fleshed out and a more + * uniform interface based around the devctls that can be leveraged across + * different classes of devices should be used. + * + * Under the hood this basically leverages the ability of the upstream port to + * retrain a link by writing a bit to the PCIe link control register. See + * pcieb_ioctl_retrain(). From there, if the driver ever receives a request to + * change the maximum speed, that is updated in the card; however, it does not + * immediately retrain the link. A separate ioctl request is required to do so. + * Once the speed has been changed, regardless of whether or not it has been + * retrained, that fact will always be noted. */ #include <sys/sysmacros.h> @@ -52,6 +163,7 @@ #include <sys/pcie_pwr.h> #include <sys/hotplug/pci/pcie_hp.h> #include "pcieb.h" +#include "pcieb_ioctl.h" #ifdef PX_PLX #include <io/pciex/pcieb_plx.h> #endif /* PX_PLX */ @@ -344,7 +456,7 @@ pcieb_41210_mps_wkrnd(dev_info_t *cdip) ~(PCIE_DEVCTL_MAX_READ_REQ_MASK | PCIE_DEVCTL_MAX_PAYLOAD_MASK)) | cdip_mrrs_mps; - PCI_CAP_PUT16(cfg_hdl, 0, cap_ptr, PCIE_DEVCTL, + (void) PCI_CAP_PUT16(cfg_hdl, 0, cap_ptr, PCIE_DEVCTL, sdip_dev_ctrl); } @@ -475,6 +587,8 @@ pcieb_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) (void) pcie_hpintr_enable(devi); + (void) pcie_link_bw_enable(devi); + /* Do any platform specific workarounds needed at this time */ pcieb_plat_attach_workaround(devi); @@ -994,7 +1108,6 @@ FAIL: * by the device. If features are not enabled first, the * device might not ask for any interrupts. */ - static int pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type) { @@ -1002,39 +1115,47 @@ pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type) int nintrs, request, count, x; int intr_cap = 0; int inum = 0; - int ret, hp_msi_off; + int ret; pcie_bus_t *bus_p = PCIE_DIP2UPBUS(dip); uint16_t vendorid = bus_p->bus_dev_ven_id & 0xFFFF; boolean_t is_hp = B_FALSE; boolean_t is_pme = B_FALSE; + boolean_t is_lbw = B_FALSE; PCIEB_DEBUG(DBG_ATTACH, dip, "pcieb_intr_init: Attaching %s handler\n", (intr_type == DDI_INTR_TYPE_MSI) ? "MSI" : "INTx"); request = 0; if (PCIE_IS_HOTPLUG_ENABLED(dip)) { - request++; is_hp = B_TRUE; } - /* - * Hotplug and PME share the same MSI vector. If hotplug is not - * supported check if MSI is needed for PME. - */ if ((intr_type == DDI_INTR_TYPE_MSI) && PCIE_IS_RP(bus_p) && (vendorid == NVIDIA_VENDOR_ID)) { is_pme = B_TRUE; - if (!is_hp) - request++; + } + + if (intr_type == DDI_INTR_TYPE_MSI && pcie_link_bw_supported(dip)) { + is_lbw = B_TRUE; } /* - * Setup MSI if this device is a Rootport and has AER. Currently no - * SPARC Root Port supports fabric errors being reported through it. + * The hot-plug, link bandwidth, and power management events all are + * based on the PCI Express capability. Therefore, they all share their + * own interrupt. + */ + if (is_hp || is_pme || is_lbw) { + request++; + } + + /* + * If this device is a root port, which means it can have MSI interrupts + * enabled for AERs, then we need to request one. */ if (intr_type == DDI_INTR_TYPE_MSI) { - if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) + if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) { request++; + } } if (request == 0) @@ -1166,22 +1287,31 @@ pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type) /* Get the MSI offset for hotplug/PME from the PCIe cap reg */ if (intr_type == DDI_INTR_TYPE_MSI) { - hp_msi_off = PCI_CAP_GET16(bus_p->bus_cfg_hdl, 0, + uint16_t pcie_msi_off; + pcie_msi_off = PCI_CAP_GET16(bus_p->bus_cfg_hdl, 0, bus_p->bus_pcie_off, PCIE_PCIECAP) & PCIE_PCIECAP_INT_MSG_NUM; - if (hp_msi_off >= count) { - PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %d in PCIe " - "cap > max allocated %d\n", hp_msi_off, count); + if (pcie_msi_off >= count) { + PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %u in PCIe " + "cap > max allocated %d\n", pcie_msi_off, count); mutex_exit(&pcieb->pcieb_intr_mutex); goto FAIL; } - if (is_hp) - pcieb->pcieb_isr_tab[hp_msi_off] |= PCIEB_INTR_SRC_HP; + if (is_hp) { + pcieb->pcieb_isr_tab[pcie_msi_off] |= PCIEB_INTR_SRC_HP; + } + + if (is_pme) { + pcieb->pcieb_isr_tab[pcie_msi_off] |= + PCIEB_INTR_SRC_PME; + } - if (is_pme) - pcieb->pcieb_isr_tab[hp_msi_off] |= PCIEB_INTR_SRC_PME; + if (is_lbw) { + pcieb->pcieb_isr_tab[pcie_msi_off] |= + PCIEB_INTR_SRC_LBW; + } } else { /* INTx handles only Hotplug interrupts */ if (is_hp) @@ -1353,6 +1483,128 @@ pcieb_close(dev_t dev, int flags, int otyp, cred_t *credp) } static int +pcieb_ioctl_retrain(pcieb_devstate_t *pcieb, cred_t *credp) +{ + pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip); + + if (drv_priv(credp) != 0) { + return (EPERM); + } + + if (!PCIE_IS_PCIE(bus_p)) { + return (ENOTSUP); + } + + if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) { + return (ENOTSUP); + } + + return (pcie_link_retrain(pcieb->pcieb_dip)); +} + +static int +pcieb_ioctl_get_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode, + cred_t *credp) +{ + pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip); + pcieb_ioctl_target_speed_t pits; + + if (drv_priv(credp) != 0) { + return (EPERM); + } + + if (!PCIE_IS_PCIE(bus_p)) { + return (ENOTSUP); + } + + if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) { + return (ENOTSUP); + } + + pits.pits_flags = 0; + pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN; + + mutex_enter(&bus_p->bus_speed_mutex); + if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) { + pits.pits_flags |= PCIEB_FLAGS_ADMIN_SET; + } + switch (bus_p->bus_target_speed) { + case PCIE_LINK_SPEED_2_5: + pits.pits_speed = PCIEB_LINK_SPEED_GEN1; + break; + case PCIE_LINK_SPEED_5: + pits.pits_speed = PCIEB_LINK_SPEED_GEN2; + break; + case PCIE_LINK_SPEED_8: + pits.pits_speed = PCIEB_LINK_SPEED_GEN3; + break; + case PCIE_LINK_SPEED_16: + pits.pits_speed = PCIEB_LINK_SPEED_GEN4; + break; + default: + pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN; + break; + } + mutex_exit(&bus_p->bus_speed_mutex); + + if (ddi_copyout(&pits, (void *)arg, sizeof (pits), + mode & FKIOCTL) != 0) { + return (EFAULT); + } + + return (0); +} + +static int +pcieb_ioctl_set_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode, + cred_t *credp) +{ + pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip); + pcieb_ioctl_target_speed_t pits; + pcie_link_speed_t speed; + + if (drv_priv(credp) != 0) { + return (EPERM); + } + + if (!PCIE_IS_PCIE(bus_p)) { + return (ENOTSUP); + } + + if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) { + return (ENOTSUP); + } + + if (ddi_copyin((void *)arg, &pits, sizeof (pits), + mode & FKIOCTL) != 0) { + return (EFAULT); + } + + if (pits.pits_flags != 0) { + return (EINVAL); + } + + switch (pits.pits_speed) { + case PCIEB_LINK_SPEED_GEN1: + speed = PCIE_LINK_SPEED_2_5; + break; + case PCIEB_LINK_SPEED_GEN2: + speed = PCIE_LINK_SPEED_5; + break; + case PCIEB_LINK_SPEED_GEN3: + speed = PCIE_LINK_SPEED_8; + break; + case PCIEB_LINK_SPEED_GEN4: + speed = PCIE_LINK_SPEED_16; + break; + default: + return (EINVAL); + } + + return (pcie_link_set_target(pcieb->pcieb_dip, speed)); +} + +static int pcieb_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) { @@ -1363,8 +1615,28 @@ pcieb_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, if (pcieb == NULL) return (ENXIO); - /* To handle devctl and hotplug related ioctls */ - rv = pcie_ioctl(pcieb->pcieb_dip, dev, cmd, arg, mode, credp, rvalp); + /* + * Check if this is one of the commands that the bridge driver natively + * understands. There are only a handful of such private ioctls defined + * in pcieb_ioctl.h. Otherwise, this ioctl should be handled by the + * general pcie driver. + */ + switch (cmd) { + case PCIEB_IOCTL_RETRAIN: + rv = pcieb_ioctl_retrain(pcieb, credp); + break; + case PCIEB_IOCTL_GET_TARGET_SPEED: + rv = pcieb_ioctl_get_speed(pcieb, arg, mode, credp); + break; + case PCIEB_IOCTL_SET_TARGET_SPEED: + rv = pcieb_ioctl_set_speed(pcieb, arg, mode, credp); + break; + default: + /* To handle devctl and hotplug related ioctls */ + rv = pcie_ioctl(pcieb->pcieb_dip, dev, cmd, arg, mode, credp, + rvalp); + break; + } return (rv); } @@ -1395,7 +1667,7 @@ pcieb_intr_handler(caddr_t arg1, caddr_t arg2) if (isrc == PCIEB_INTR_SRC_UNKNOWN) goto FAIL; - if (isrc & PCIEB_INTR_SRC_HP) + if (isrc & (PCIEB_INTR_SRC_HP | PCIEB_INTR_SRC_LBW)) ret = pcie_intr(dip); if (isrc & PCIEB_INTR_SRC_PME) diff --git a/usr/src/uts/common/io/pciex/pcieb.h b/usr/src/uts/common/io/pciex/pcieb.h index 0868458805..7c1e40ecd9 100644 --- a/usr/src/uts/common/io/pciex/pcieb.h +++ b/usr/src/uts/common/io/pciex/pcieb.h @@ -23,7 +23,7 @@ */ /* - * Copyright 2019, Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_PCIEB_H @@ -139,6 +139,7 @@ extern void *pcieb_state; #define PCIEB_INTR_SRC_HP 0x1 #define PCIEB_INTR_SRC_PME 0x2 #define PCIEB_INTR_SRC_AER 0x4 +#define PCIEB_INTR_SRC_LBW 0x8 /* * Need to put vendor ids in a common file and not platform specific files diff --git a/usr/src/uts/common/io/pciex/pcieb_ioctl.h b/usr/src/uts/common/io/pciex/pcieb_ioctl.h new file mode 100644 index 0000000000..40134037a4 --- /dev/null +++ b/usr/src/uts/common/io/pciex/pcieb_ioctl.h @@ -0,0 +1,64 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#ifndef _IO_PCIE_PCIEB_IOCTL_H +#define _IO_PCIE_PCIEB_IOCTL_H + +/* + * These are private ioctls for PCIe bridges that are currently consumed by the + * 'pcieb' command. These should be used until we figure out how best to + * represent PCIe links in the traditional cfgadm and devctl frameworks. + */ + +#include <sys/stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define PCIEB_IOCTL (('p' << 24) | ('c' << 16) | ('b' << 8)) + +/* + * This requests that we retrain the link that the PCIe bridge has to its + * downstream component. + */ +#define PCIEB_IOCTL_RETRAIN (PCIEB_IOCTL | 0x01) + +/* + * Get and set the current target speed for a bridge. The target speed of the + * bridge will have an impact on the values that end up being used by its + * downstream components. + */ +#define PCIEB_IOCTL_GET_TARGET_SPEED (PCIEB_IOCTL | 0x02) +#define PCIEB_IOCTL_SET_TARGET_SPEED (PCIEB_IOCTL | 0x03) + +typedef struct pcieb_ioctl_target_speed { + uint32_t pits_flags; + uint32_t pits_speed; +} pcieb_ioctl_target_speed_t; + +#define PCIEB_FLAGS_ADMIN_SET 0x01 + +#define PCIEB_LINK_SPEED_UNKNOWN 0x00 +#define PCIEB_LINK_SPEED_GEN1 0x01 +#define PCIEB_LINK_SPEED_GEN2 0x02 +#define PCIEB_LINK_SPEED_GEN3 0x03 +#define PCIEB_LINK_SPEED_GEN4 0x04 + +#ifdef __cplusplus +} +#endif + +#endif /* _IO_PCIE_PCIEB_IOCTL_H */ diff --git a/usr/src/uts/common/sys/pcie.h b/usr/src/uts/common/sys/pcie.h index a26729c523..e8f91a1390 100644 --- a/usr/src/uts/common/sys/pcie.h +++ b/usr/src/uts/common/sys/pcie.h @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright 2019, Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_PCIE_H @@ -494,6 +494,11 @@ extern "C" { /* * Link Control 2 Register (2 bytes) */ + +#define PCIE_LINKCTL2_TARGET_SPEED_2_5 0x1 /* 2.5 GT/s Speed */ +#define PCIE_LINKCTL2_TARGET_SPEED_5 0x2 /* 5.0 GT/s Speed */ +#define PCIE_LINKCTL2_TARGET_SPEED_8 0x3 /* 8.0 GT/s Speed */ +#define PCIE_LINKCTL2_TARGET_SPEED_16 0x4 /* 16.0 GT/s Speed */ #define PCIE_LINKCTL2_TARGET_SPEED_MASK 0x000f #define PICE_LINKCTL2_ENTER_COMPLIANCE 0x0010 #define PCIE_LINKCTL2_HW_AUTO_SPEED_DIS 0x0020 diff --git a/usr/src/uts/common/sys/pcie_impl.h b/usr/src/uts/common/sys/pcie_impl.h index faebc9d020..d1d13625c2 100644 --- a/usr/src/uts/common/sys/pcie_impl.h +++ b/usr/src/uts/common/sys/pcie_impl.h @@ -20,11 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019, Joyent, Inc. - */ - -/* - * Copyright 2019, Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_PCIE_IMPL_H @@ -36,6 +32,7 @@ extern "C" { #include <sys/pcie.h> #include <sys/pciev.h> +#include <sys/taskq_impl.h> #define PCI_GET_BDF(dip) \ PCIE_DIP2BUS(dip)->bus_bdf @@ -303,12 +300,22 @@ typedef enum pcie_link_width { */ typedef enum pcie_link_speed { PCIE_LINK_SPEED_UNKNOWN = 0x00, - PCIE_LINK_SPEED_2_5 = 0x01, - PCIE_LINK_SPEED_5 = 0x02, - PCIE_LINK_SPEED_8 = 0x04, - PCIE_LINK_SPEED_16 = 0x08 + PCIE_LINK_SPEED_2_5 = 1 << 0, + PCIE_LINK_SPEED_5 = 1 << 1, + PCIE_LINK_SPEED_8 = 1 << 2, + PCIE_LINK_SPEED_16 = 1 << 3 } pcie_link_speed_t; +typedef enum pcie_link_flags { + PCIE_LINK_F_ADMIN_TARGET = 1 << 1 +} pcie_link_flags_t; + +typedef enum { + PCIE_LBW_S_ENABLED = 1 << 0, + PCIE_LBW_S_DISPATCHED = 1 << 1, + PCIE_LBW_S_RUNNING = 1 << 2 +} pcie_lbw_state_t; + /* * For hot plugged device, these data are init'ed during during probe * For non-hotplugged device, these data are init'ed in pci_autoconfig (on x86), @@ -367,11 +374,25 @@ typedef struct pcie_bus { /* * Link speed specific fields. */ + kmutex_t bus_speed_mutex; + pcie_link_flags_t bus_speed_flags; pcie_link_width_t bus_max_width; pcie_link_width_t bus_cur_width; pcie_link_speed_t bus_sup_speed; pcie_link_speed_t bus_max_speed; pcie_link_speed_t bus_cur_speed; + pcie_link_speed_t bus_target_speed; + + /* + * Link Bandwidth Monitoring + */ + kmutex_t bus_lbw_mutex; + kcondvar_t bus_lbw_cv; + pcie_lbw_state_t bus_lbw_state; + taskq_ent_t bus_lbw_ent; + uint64_t bus_lbw_nevents; + char *bus_lbw_pbuf; + char *bus_lbw_cbuf; } pcie_bus_t; /* @@ -652,6 +673,15 @@ extern pcie_bus_t *pciev_get_affected_dev(pf_impl_t *, pf_data_t *, extern void pciev_eh_exit(pf_data_t *, uint_t); extern boolean_t pcie_in_domain(pcie_bus_t *, uint_t); +/* Link Bandwidth Monitoring */ +extern boolean_t pcie_link_bw_supported(dev_info_t *); +extern int pcie_link_bw_enable(dev_info_t *); +extern int pcie_link_bw_disable(dev_info_t *); + +/* Link Management */ +extern int pcie_link_set_target(dev_info_t *, pcie_link_speed_t); +extern int pcie_link_retrain(dev_info_t *); + #define PCIE_ZALLOC(data) kmem_zalloc(sizeof (data), KM_SLEEP) diff --git a/usr/src/uts/common/sys/sysevent/eventdefs.h b/usr/src/uts/common/sys/sysevent/eventdefs.h index 8995ba4aa0..100b4c7ee9 100644 --- a/usr/src/uts/common/sys/sysevent/eventdefs.h +++ b/usr/src/uts/common/sys/sysevent/eventdefs.h @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. - * Copyright 2017 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_SYSEVENT_EVENTDEFS_H @@ -55,6 +55,7 @@ extern "C" { #define EC_ZFS "EC_zfs" /* ZFS event */ #define EC_DATALINK "EC_datalink" /* datalink event */ #define EC_VRRP "EC_vrrp" /* VRRP event */ +#define EC_PCIE "EC_pcie" /* PCIe event */ /* * The following event class is reserved for exclusive use @@ -224,6 +225,12 @@ extern "C" { */ #define ESC_VRRP_STATE_CHANGE "ESC_vrrp_state_change" +/* + * PCIe subclass definitions. Supporting attributes for PCIe state found in + * sys/sysevent/pcie.h. + */ +#define ESC_PCIE_LINK_STATE "ESC_pcie_link_state" + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/sysevent/pcie.h b/usr/src/uts/common/sys/sysevent/pcie.h new file mode 100644 index 0000000000..ff1dc71c5d --- /dev/null +++ b/usr/src/uts/common/sys/sysevent/pcie.h @@ -0,0 +1,57 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#ifndef _SYS_SYSEVENT_PCIE_H +#define _SYS_SYSEVENT_PCIE_H + +/* + * PCIe System Event payloads + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Event schema for ESC_PCIE_LINK_STATE + * + * Event Class - EC_PCIE + * Event Sub-Class - ESC_PCIE_LINK_STATE + * + * Attribute Name - PCIE_EV_DETECTOR_PATH + * Attribute Type - SE_DATA_TYPE_STRING + * Attribute Value - [devfs path of the node that detected the change] + * + * Attribute Name - PCIE_EV_CHILD_PATH + * Attribute Type - SE_DATA_TYPE_STRING + * Attribute Value - [devfs path of the updated child] + * + * Attribute Name - PCIE_EV_DETECTOR_FLAGS + * Attribute Type - SE_DATA_TYPE_UINT64 + * Attribute Value - [PCIe flags that indicate the type of change] + */ + +#define PCIE_EV_DETECTOR_PATH "detector_path" +#define PCIE_EV_CHILD_PATH "child_path" +#define PCIE_EV_DETECTOR_FLAGS "detector_flags" + +#define PCIE_EV_DETECTOR_FLAGS_LBMS 0x01 +#define PCIE_EV_DETECTOR_FLAGS_LABS 0x02 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SYSEVENT_PCIE_H */ diff --git a/usr/src/uts/i86pc/pcie/Makefile b/usr/src/uts/i86pc/pcie/Makefile index b72a3772dc..1a849006f5 100644 --- a/usr/src/uts/i86pc/pcie/Makefile +++ b/usr/src/uts/i86pc/pcie/Makefile @@ -24,7 +24,7 @@ # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# Copyright (c) 2018, Joyent, Inc. +# Copyright 2019 Joyent, Inc. # Copyright 2019 OmniOS Community Edition (OmniOSce) Association. # @@ -57,8 +57,6 @@ ALL_TARGET = $(BINARY) INSTALL_TARGET = $(BINARY) $(ROOTMODULE) CERRWARN += -_gcc=-Wno-unused-value -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-parentheses CERRWARN += -_gcc=-Wno-unused-variable CERRWARN += -_gcc=-Wno-unused-function # safe diff --git a/usr/src/uts/intel/io/pciex/pcieb_x86.c b/usr/src/uts/intel/io/pciex/pcieb_x86.c index 0d73ca010e..3d3906cd1b 100644 --- a/usr/src/uts/intel/io/pciex/pcieb_x86.c +++ b/usr/src/uts/intel/io/pciex/pcieb_x86.c @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. */ /* x86 specific code used by the pcieb driver */ @@ -43,7 +44,8 @@ int pcieb_intel_workaround_disable = 0; void -pcieb_peekpoke_cb(dev_info_t *dip, ddi_fm_error_t *derr) { +pcieb_peekpoke_cb(dev_info_t *dip, ddi_fm_error_t *derr) +{ pf_eh_enter(PCIE_DIP2BUS(dip)); (void) pf_scan_fabric(dip, derr, NULL); pf_eh_exit(PCIE_DIP2BUS(dip)); @@ -201,7 +203,8 @@ pcieb_plat_uninitchild(dev_info_t *child) /* _OSC related */ void -pcieb_init_osc(dev_info_t *devi) { +pcieb_init_osc(dev_info_t *devi) +{ pcie_bus_t *bus_p = PCIE_DIP2UPBUS(devi); uint32_t osc_flags = OSC_CONTROL_PCIE_ADV_ERR; @@ -322,10 +325,10 @@ static x86_error_reg_t intel_5000_rp_regs[] = { {0x144, 8, 0x0, 0x0, 0xF0}, /* EMASK_UNCOR_PEX[21:0] UE mask */ - {0x148, 32, 0x0, PCIE_AER_UCE_UR, PCIE_AER_UCE_UR}, + {0x148, 32, 0x0, PCIE_AER_UCE_UR, PCIE_AER_UCE_UR}, /* EMASK_RP_PEX[2:0] FE, UE, CE message detect mask */ - {0x150, 8, 0x0, 0x0, 0x1}, + {0x150, 8, 0x0, 0x0, 0x1}, }; #define INTEL_5000_RP_REGS_LEN \ (sizeof (intel_5000_rp_regs) / sizeof (x86_error_reg_t)) @@ -353,16 +356,16 @@ static x86_error_reg_t intel_5400_rp_regs[] = { {0x4E, 8, 0x0, 0x1, 0x0}, /* PEX_ERR_DOCMD[11:0] */ - {0x144, 16, 0x0, 0x0, 0xFF0}, + {0x144, 16, 0x0, 0x0, 0xFF0}, /* PEX_ERR_PIN_MASK[4:0] do not mask ERR[2:0] pins used by DOCMD */ {0x146, 16, 0x0, 0x10, 0x10}, /* EMASK_UNCOR_PEX[21:0] UE mask */ - {0x148, 32, 0x0, PCIE_AER_UCE_UR, PCIE_AER_UCE_UR}, + {0x148, 32, 0x0, PCIE_AER_UCE_UR, PCIE_AER_UCE_UR}, /* EMASK_RP_PEX[2:0] FE, UE, CE message detect mask */ - {0x150, 8, 0x0, 0x0, 0x1}, + {0x150, 8, 0x0, 0x0, 0x1}, }; #define INTEL_5400_RP_REGS_LEN \ (sizeof (intel_5400_rp_regs) / sizeof (x86_error_reg_t)) @@ -569,7 +572,7 @@ pcieb_intel_rber_workaround(dev_info_t *dip) if (!rber) return; - PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_SERV, pcieb_rber_sev); + (void) PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_SERV, pcieb_rber_sev); } /* diff --git a/usr/src/uts/intel/pcieb/Makefile b/usr/src/uts/intel/pcieb/Makefile index 3b9f363520..38f4b38a98 100644 --- a/usr/src/uts/intel/pcieb/Makefile +++ b/usr/src/uts/intel/pcieb/Makefile @@ -24,7 +24,7 @@ # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# Copyright (c) 2018, Joyent, Inc. +# Copyright 2019 Joyent, Inc. # # Path to the base of the uts directory tree (usually /usr/src/uts). @@ -64,21 +64,6 @@ MODSTUBS_DIR = $(OBJS_DIR) CLEANFILES += $(MODSTUBS_O) # -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -CERRWARN += -_gcc=-Wno-unused-value - -# needs work -SMOFF += all_func_returns - -# # Default build targets. # .KEEP_STATE: @@ -103,4 +88,3 @@ install: $(INSTALL_DEPS) # Include common targets. # include $(UTSBASE)/intel/Makefile.targ - |