summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Mustacchi <rm@joyent.com>2019-06-08 01:35:39 +0000
committerRobert Mustacchi <rm@joyent.com>2019-06-21 17:52:24 +0000
commitc6ce7a25abbb33fa38edd55230b7992c5a36ce91 (patch)
tree7141755661b79ec4feb3f61dea932f535fc4efe5
parentf866eafe56a327754d101195d6d63f024fb095a5 (diff)
downloadillumos-joyent-c6ce7a25abbb33fa38edd55230b7992c5a36ce91.tar.gz
OS-7719 PCIe speeds and feeds should be exposed to userland
OS-7718 pcieb should enable link bandwidth notifications OS-7742 Clean up pcieb CERRWARN and smatch OS-7743 Clean up pcie module -Wno-uninitialized and -Wno-parentheses Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com> Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Approved by: Rob Johnston <rob.johnston@joyent.com>
-rw-r--r--manifest1
-rw-r--r--usr/src/cmd/Makefile3
-rw-r--r--usr/src/cmd/pcieb/Makefile36
-rw-r--r--usr/src/cmd/pcieb/pcieb.c169
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/topo_hc.h7
-rw-r--r--usr/src/lib/fm/topo/modules/common/pcibus/did_props.c114
-rw-r--r--usr/src/lib/fm/topo/modules/common/pcibus/did_props.h10
-rw-r--r--usr/src/uts/common/io/pciex/hotplug/pcie_hp.c3
-rw-r--r--usr/src/uts/common/io/pciex/hotplug/pciehpc.c13
-rw-r--r--usr/src/uts/common/io/pciex/hotplug/pcishpc.c10
-rw-r--r--usr/src/uts/common/io/pciex/pcie.c557
-rw-r--r--usr/src/uts/common/io/pciex/pcie_pwr.c9
-rw-r--r--usr/src/uts/common/io/pciex/pcieb.c320
-rw-r--r--usr/src/uts/common/io/pciex/pcieb.h3
-rw-r--r--usr/src/uts/common/io/pciex/pcieb_ioctl.h64
-rw-r--r--usr/src/uts/common/sys/pcie.h7
-rw-r--r--usr/src/uts/common/sys/pcie_impl.h48
-rw-r--r--usr/src/uts/common/sys/sysevent/eventdefs.h9
-rw-r--r--usr/src/uts/common/sys/sysevent/pcie.h57
-rw-r--r--usr/src/uts/i86pc/pcie/Makefile4
-rw-r--r--usr/src/uts/intel/io/pciex/pcieb_x86.c19
-rw-r--r--usr/src/uts/intel/pcieb/Makefile18
22 files changed, 1385 insertions, 96 deletions
diff --git a/manifest b/manifest
index d431cdbf14..d3e35d87e0 100644
--- a/manifest
+++ b/manifest
@@ -9972,6 +9972,7 @@ f usr/lib/passwdutil.so.1 0755 root bin
d usr/lib/pci 0755 root bin
f usr/lib/pci/pcidr 0555 root bin
f usr/lib/pci/pcidr_plugin.so 0755 root bin
+f usr/lib/pci/pcieb 0555 root bin
f usr/lib/pfexecd 0555 root bin
d usr/lib/picl 0755 root sys
f usr/lib/picl/picld 0555 root sys
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 426ae91336..fdbb25c9a0 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -21,7 +21,7 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2019, Joyent, Inc.
+# Copyright 2019 Joyent, Inc.
# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
# Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
@@ -304,6 +304,7 @@ COMMON_SUBDIRS= \
pathchk \
pbind \
pcidr \
+ pcieb \
pcitool \
pfexec \
pfexecd \
diff --git a/usr/src/cmd/pcieb/Makefile b/usr/src/cmd/pcieb/Makefile
new file mode 100644
index 0000000000..6d81356692
--- /dev/null
+++ b/usr/src/cmd/pcieb/Makefile
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019 Joyent, Inc.
+#
+
+PROG= pcieb
+LINTPROGS= $(PROG:%=%.ln)
+
+include ../Makefile.cmd
+
+ROOTCMDDIR = $(ROOTLIB)/pci
+
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -I$(SRC)/uts/common/io/pciex/
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTCMD)
+
+clean:
+
+lint: $(LINTPROGS)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/pcieb/pcieb.c b/usr/src/cmd/pcieb/pcieb.c
new file mode 100644
index 0000000000..0c829e8a51
--- /dev/null
+++ b/usr/src/cmd/pcieb/pcieb.c
@@ -0,0 +1,169 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * Private command to manipulate link speeds of PCIe bridges and allow
+ * retraining. This is designed to aid debugging.
+ */
+
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <libgen.h>
+#include <string.h>
+#include <err.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <pcieb_ioctl.h>
+
+static const char *pcieb_progname;
+
+static void
+pcieb_usage(const char *fmt, ...)
+{
+ if (fmt != NULL) {
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", pcieb_progname);
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+
+ (void) fprintf(stderr, "Usage: %s [-x] [-s speed] pcie-bridge\n"
+ "\n"
+ "\t-s speed Set link to speed\n",
+ "\t-x Retrain link\n",
+ pcieb_progname);
+
+}
+
+static uint32_t
+pcieb_parse_speed(const char *s)
+{
+ if (strcasecmp(s, "2.5") == 0 || strcasecmp(s, "gen1") == 0) {
+ return (PCIEB_LINK_SPEED_GEN1);
+ } else if (strcasecmp(s, "5") == 0 || strcasecmp(s, "gen2") == 0) {
+ return (PCIEB_LINK_SPEED_GEN2);
+ } else if (strcasecmp(s, "8") == 0 || strcasecmp(s, "gen3") == 0) {
+ return (PCIEB_LINK_SPEED_GEN3);
+ } else if (strcasecmp(s, "16") == 0 || strcasecmp(s, "gen4") == 0) {
+ return (PCIEB_LINK_SPEED_GEN4);
+ } else {
+ errx(EXIT_FAILURE, "invalid speed: %s", s);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ int c;
+ boolean_t retrain = B_FALSE;
+ boolean_t set = B_FALSE;
+ boolean_t get = B_TRUE;
+ uint32_t speed = PCIEB_LINK_SPEED_UNKNOWN;
+ int fd;
+
+ pcieb_progname = basename(argv[0]);
+
+ while ((c = getopt(argc, argv, ":xs:")) != -1) {
+ switch (c) {
+ case 's':
+ speed = pcieb_parse_speed(optarg);
+ set = B_TRUE;
+ get = B_FALSE;
+ break;
+ case 'x':
+ retrain = B_TRUE;
+ get = B_FALSE;
+ break;
+ case ':':
+ pcieb_usage("option -%c requires an operand\n", optopt);
+ return (2);
+ case '?':
+ default:
+ pcieb_usage("unknown option: -%c\n", optopt);
+ return (2);
+
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1) {
+ pcieb_usage("missing required PCIe bridge device\n");
+ return (2);
+ }
+
+ if ((fd = open(argv[0], O_RDWR)) < 0) {
+ err(EXIT_FAILURE, "failed to open %s", argv[0]);
+ }
+
+ if (set) {
+ pcieb_ioctl_target_speed_t pits;
+
+ pits.pits_flags = 0;
+ pits.pits_speed = speed;
+
+ if (ioctl(fd, PCIEB_IOCTL_SET_TARGET_SPEED, &pits) != 0) {
+ err(EXIT_FAILURE, "failed to set target speed");
+ }
+ }
+
+ if (retrain) {
+ if (ioctl(fd, PCIEB_IOCTL_RETRAIN) != 0) {
+ err(EXIT_FAILURE, "failed to retrain link");
+ }
+ }
+
+ if (get) {
+ pcieb_ioctl_target_speed_t pits;
+
+ if (ioctl(fd, PCIEB_IOCTL_GET_TARGET_SPEED, &pits) != 0) {
+ err(EXIT_FAILURE, "failed to get target speed");
+ }
+
+ (void) printf("Bridge target speed: ");
+ switch (pits.pits_speed) {
+ case PCIEB_LINK_SPEED_GEN1:
+ (void) printf("2.5 GT/s (gen1)\n");
+ break;
+ case PCIEB_LINK_SPEED_GEN2:
+ (void) printf("5.0 GT/s (gen2)\n");
+ break;
+ case PCIEB_LINK_SPEED_GEN3:
+ (void) printf("8.0 GT/s (gen3)\n");
+ break;
+ case PCIEB_LINK_SPEED_GEN4:
+ (void) printf("16.0 GT/s (gen4)\n");
+ break;
+ default:
+ (void) printf("Unknown Value: 0x%x\n", pits.pits_speed);
+ }
+
+ if ((pits.pits_flags & ~PCIEB_FLAGS_ADMIN_SET) != 0) {
+ (void) printf("Unknown flags: 0x%x\n", pits.pits_flags);
+ } else if ((pits.pits_flags & PCIEB_FLAGS_ADMIN_SET) != 0) {
+ (void) printf("Flags: Admin Set Speed\n");
+ }
+ }
+
+ (void) close(fd);
+ return (0);
+}
diff --git a/usr/src/lib/fm/topo/libtopo/common/topo_hc.h b/usr/src/lib/fm/topo/libtopo/common/topo_hc.h
index df52b7c6e1..8f245c1cf0 100644
--- a/usr/src/lib/fm/topo/libtopo/common/topo_hc.h
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_hc.h
@@ -124,6 +124,13 @@ extern "C" {
#define TOPO_PCI_CLASS "class-code"
#define TOPO_PCI_AADDR "assigned-addresses"
+#define TOPO_PCI_MAX_WIDTH "link-maximum-width"
+#define TOPO_PCI_CUR_WIDTH "link-current-width"
+#define TOPO_PCI_MAX_SPEED "link-maximum-speed"
+#define TOPO_PCI_CUR_SPEED "link-current-speed"
+#define TOPO_PCI_SUP_SPEED "link-supported-speeds"
+#define TOPO_PCI_ADMIN_SPEED "link-admin-target-speed"
+
#define TOPO_PGROUP_BINDING "binding"
#define TOPO_BINDING_OCCUPANT "occupant-path"
#define TOPO_BINDING_DRIVER "driver"
diff --git a/usr/src/lib/fm/topo/modules/common/pcibus/did_props.c b/usr/src/lib/fm/topo/modules/common/pcibus/did_props.c
index e216dec9d1..af4292830f 100644
--- a/usr/src/lib/fm/topo/modules/common/pcibus/did_props.c
+++ b/usr/src/lib/fm/topo/modules/common/pcibus/did_props.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#include <assert.h>
@@ -74,6 +74,14 @@ static int AADDR_set(tnode_t *, did_t *,
const char *, const char *, const char *);
static int maybe_pcidb_set(tnode_t *, did_t *,
const char *, const char *, const char *);
+static int maybe_di_int_to_uint32(tnode_t *, did_t *,
+ const char *, const char *, const char *);
+static int maybe_pcie_speed(tnode_t *, did_t *,
+ const char *, const char *, const char *);
+static int maybe_pcie_supported_speed(tnode_t *, did_t *,
+ const char *, const char *, const char *);
+static int maybe_pcie_target_speed(tnode_t *, did_t *,
+ const char *, const char *, const char *);
/*
* Arrays of "property translation routines" to set the properties a
@@ -169,7 +177,18 @@ txprop_t Fn_common_props[] = {
txprop_t Dev_common_props[] = {
{ NULL, &protocol_pgroup, TOPO_PROP_LABEL, label_set },
{ NULL, &protocol_pgroup, TOPO_PROP_FRU, FRU_set },
- { NULL, &protocol_pgroup, TOPO_PROP_ASRU, ASRU_set }
+ { NULL, &protocol_pgroup, TOPO_PROP_ASRU, ASRU_set },
+ { DI_PCIE_MAX_WIDTH, &pci_pgroup, TOPO_PCI_MAX_WIDTH,
+ maybe_di_int_to_uint32 },
+ { DI_PCIE_CUR_WIDTH, &pci_pgroup, TOPO_PCI_CUR_WIDTH,
+ maybe_di_int_to_uint32 },
+ { DI_PCIE_MAX_SPEED, &pci_pgroup, TOPO_PCI_MAX_SPEED,
+ maybe_pcie_speed },
+ { DI_PCIE_CUR_SPEED, &pci_pgroup, TOPO_PCI_CUR_SPEED,
+ maybe_pcie_speed },
+ { DI_PCIE_SUP_SPEEDS, &pci_pgroup, TOPO_PCI_SUP_SPEED,
+ maybe_pcie_supported_speed },
+ { NULL, &pci_pgroup, TOPO_PCI_ADMIN_SPEED, maybe_pcie_target_speed }
};
txprop_t Bus_common_props[] = {
@@ -1057,3 +1076,94 @@ did_props_set(tnode_t *tn, did_t *pd, txprop_t txarray[], int txnum)
}
return (0);
}
+
+static int
+maybe_di_int_to_uint32(tnode_t *tn, did_t *pd, const char *dpnm,
+ const char *tpgrp, const char *tpnm)
+{
+ int ret, *vals;
+
+ ret = di_prop_lookup_ints(DDI_DEV_T_ANY, did_dinode(pd), dpnm, &vals);
+ if (ret != 1) {
+ return (0);
+ }
+
+ if (topo_prop_set_uint32(tn, tpgrp, tpnm, 0, (uint32_t)*vals, &ret) !=
+ 0) {
+ return (topo_mod_seterrno(did_mod(pd), ret));
+ }
+
+ return (0);
+}
+
+static int
+maybe_pcie_speed(tnode_t *tn, did_t *pd, const char *dpnm, const char *tpgrp,
+ const char *tpnm)
+{
+ int ret;
+ int64_t *vals;
+
+ ret = di_prop_lookup_int64(DDI_DEV_T_ANY, did_dinode(pd), dpnm, &vals);
+ if (ret != 1) {
+ return (0);
+ }
+
+ if (topo_prop_set_uint64(tn, tpgrp, tpnm, 0, (uint64_t)*vals, &ret) !=
+ 0) {
+ return (topo_mod_seterrno(did_mod(pd), ret));
+ }
+ return (0);
+}
+
+static int
+maybe_pcie_supported_speed(tnode_t *tn, did_t *pd, const char *dpnm,
+ const char *tpgrp, const char *tpnm)
+{
+ int ret;
+ uint_t count;
+ int64_t *vals;
+
+ ret = di_prop_lookup_int64(DDI_DEV_T_ANY, did_dinode(pd), dpnm, &vals);
+ if (ret < 1) {
+ return (0);
+ }
+
+ count = (uint_t)ret;
+ if (topo_prop_set_uint64_array(tn, tpgrp, tpnm, 0, (uint64_t *)vals,
+ count, &ret) != 0) {
+ return (topo_mod_seterrno(did_mod(pd), ret));
+ }
+ return (0);
+}
+
+static int
+maybe_pcie_target_speed(tnode_t *tn, did_t *pd, const char *dpnm,
+ const char *tpgrp, const char *tpnm)
+{
+ di_prop_t prop = DI_PROP_NIL;
+ boolean_t admin = B_FALSE;
+ int64_t *val = NULL;
+ int ret;
+
+ while ((prop = di_prop_next(did_dinode(pd), prop)) != DI_PROP_NIL) {
+ const char *n = di_prop_name(prop);
+
+ if (strcmp(DI_PCIE_ADMIN_TAG, n) == 0) {
+ admin = B_TRUE;
+ } else if (strcmp(DI_PCIE_TARG_SPEED, n) == 0) {
+ if (di_prop_int64(prop, &val) != 1) {
+ val = NULL;
+ }
+ }
+ }
+
+ if (!admin || val == NULL) {
+ return (0);
+ }
+
+ if (topo_prop_set_uint64(tn, tpgrp, tpnm, 0, (uint64_t)*val, &ret) !=
+ 0) {
+ return (topo_mod_seterrno(did_mod(pd), ret));
+ }
+ return (0);
+}
diff --git a/usr/src/lib/fm/topo/modules/common/pcibus/did_props.h b/usr/src/lib/fm/topo/modules/common/pcibus/did_props.h
index cddf90e79d..eb42432573 100644
--- a/usr/src/lib/fm/topo/modules/common/pcibus/did_props.h
+++ b/usr/src/lib/fm/topo/modules/common/pcibus/did_props.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _DID_PROPS_H
@@ -73,6 +73,14 @@ typedef struct txprop {
#define DI_RECEPTACLE_PHYMASK "receptacle-pm"
#define DI_RECEPTACLE_LABEL "receptacle-label"
+#define DI_PCIE_MAX_WIDTH "pcie-link-maximum-width"
+#define DI_PCIE_CUR_WIDTH "pcie-link-current-width"
+#define DI_PCIE_CUR_SPEED "pcie-link-current-speed"
+#define DI_PCIE_MAX_SPEED "pcie-link-maximum-speed"
+#define DI_PCIE_SUP_SPEEDS "pcie-link-supported-speeds"
+#define DI_PCIE_TARG_SPEED "pcie-link-target-speed"
+#define DI_PCIE_ADMIN_TAG "pcie-link-admin-target-speed"
+
extern int did_props_set(tnode_t *, did_t *, txprop_t[], int);
extern tnode_t *find_predecessor(tnode_t *, char *);
diff --git a/usr/src/uts/common/io/pciex/hotplug/pcie_hp.c b/usr/src/uts/common/io/pciex/hotplug/pcie_hp.c
index 9a2163f9b0..8adfb4f6f9 100644
--- a/usr/src/uts/common/io/pciex/hotplug/pcie_hp.c
+++ b/usr/src/uts/common/io/pciex/hotplug/pcie_hp.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -469,7 +470,7 @@ pcie_hp_create_occupant_props(dev_info_t *dip, dev_t dev, int pci_dev)
{
pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
pcie_hp_ctrl_t *ctrl_p = (pcie_hp_ctrl_t *)bus_p->bus_hp_ctrl;
- pcie_hp_slot_t *slotp;
+ pcie_hp_slot_t *slotp = NULL;
pcie_hp_cn_cfg_t cn_cfg;
pcie_hp_occupant_info_t *occupant;
int circular, i;
diff --git a/usr/src/uts/common/io/pciex/hotplug/pciehpc.c b/usr/src/uts/common/io/pciex/hotplug/pciehpc.c
index 94a32abf1b..5ce219bd2f 100644
--- a/usr/src/uts/common/io/pciex/hotplug/pciehpc.c
+++ b/usr/src/uts/common/io/pciex/hotplug/pciehpc.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -1585,8 +1586,9 @@ pciehpc_slot_get_property(pcie_hp_slot_t *slot_p, ddi_hp_property_t *arg,
/* for each requested property, get the value and add it to nvlist */
prop_pair = NULL;
- while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) {
+ while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) {
name = nvpair_name(prop_pair);
+ value = NULL;
if (strcmp(name, PCIEHPC_PROP_LED_FAULT) == 0) {
value = pcie_led_state_text(
@@ -1795,7 +1797,7 @@ set_prop_cleanup1:
/* Validate the request */
prop_pair = NULL;
- while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) {
+ while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) {
name = nvpair_name(prop_pair);
if (nvpair_type(prop_pair) != DATA_TYPE_STRING) {
PCIE_DBG("Unexpected data type of setting "
@@ -1832,9 +1834,12 @@ set_prop_cleanup1:
/* set each property */
prop_pair = NULL;
- while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) {
+ while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) {
name = nvpair_name(prop_pair);
+ /*
+ * The validity of the property was checked above.
+ */
if (strcmp(name, PCIEHPC_PROP_LED_ATTN) == 0) {
if (strcmp(value, PCIEHPC_PROP_VALUE_ON) == 0)
led_state = PCIE_HP_LED_ON;
@@ -1842,6 +1847,8 @@ set_prop_cleanup1:
led_state = PCIE_HP_LED_OFF;
else if (strcmp(value, PCIEHPC_PROP_VALUE_BLINK) == 0)
led_state = PCIE_HP_LED_BLINK;
+ else
+ continue;
pciehpc_set_led_state(ctrl_p, PCIE_HP_ATTN_LED,
led_state);
diff --git a/usr/src/uts/common/io/pciex/hotplug/pcishpc.c b/usr/src/uts/common/io/pciex/hotplug/pcishpc.c
index 22e7418096..d6057de1b8 100644
--- a/usr/src/uts/common/io/pciex/hotplug/pcishpc.c
+++ b/usr/src/uts/common/io/pciex/hotplug/pcishpc.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -449,8 +450,9 @@ pcishpc_slot_get_property(pcie_hp_slot_t *slot_p, ddi_hp_property_t *arg,
/* for each requested property, get the value and add it to nvlist */
prop_pair = NULL;
- while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) {
+ while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) {
name = nvpair_name(prop_pair);
+ value = NULL;
if (strcmp(name, PCIEHPC_PROP_LED_FAULT) == 0) {
value = pcie_led_state_text(
@@ -661,7 +663,7 @@ set_prop_cleanup1:
/* Validate the request */
prop_pair = NULL;
- while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) {
+ while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) {
name = nvpair_name(prop_pair);
if (nvpair_type(prop_pair) != DATA_TYPE_STRING) {
PCIE_DBG("Unexpected data type of setting "
@@ -699,7 +701,7 @@ set_prop_cleanup1:
// set each property
prop_pair = NULL;
- while (prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) {
+ while ((prop_pair = nvlist_next_nvpair(prop_list, prop_pair)) != NULL) {
name = nvpair_name(prop_pair);
if (strcmp(name, PCIEHPC_PROP_LED_ATTN) == 0) {
@@ -709,6 +711,8 @@ set_prop_cleanup1:
led_state = PCIE_HP_LED_OFF;
else if (strcmp(value, PCIEHPC_PROP_VALUE_BLINK) == 0)
led_state = PCIE_HP_LED_BLINK;
+ else
+ continue;
(void) pcishpc_setled(slot_p, PCIE_HP_ATTN_LED,
led_state);
diff --git a/usr/src/uts/common/io/pciex/pcie.c b/usr/src/uts/common/io/pciex/pcie.c
index b06e750888..1922f821f3 100644
--- a/usr/src/uts/common/io/pciex/pcie.c
+++ b/usr/src/uts/common/io/pciex/pcie.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#include <sys/sysmacros.h>
@@ -45,6 +45,9 @@
#include <sys/hotplug/pci/pcishpc.h>
#include <sys/hotplug/pci/pcicfg.h>
#include <sys/pci_cfgacc.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/pcie.h>
/* Local functions prototypes */
static void pcie_init_pfd(dev_info_t *);
@@ -141,12 +144,24 @@ uint32_t pcie_aer_suce_severity = PCIE_AER_SUCE_SERR_ASSERT | \
int pcie_max_mps = PCIE_DEVCTL_MAX_PAYLOAD_4096 >> 5;
int pcie_disable_ari = 0;
+/*
+ * Amount of time to wait for an in-progress retraining. The default is to try
+ * 500 times in 10ms chunks, thus a total of 5s.
+ */
+uint32_t pcie_link_retrain_count = 500;
+uint32_t pcie_link_retrain_delay_ms = 10;
+
+taskq_t *pcie_link_tq;
+kmutex_t pcie_link_tq_mutex;
+
static void pcie_scan_mps(dev_info_t *rc_dip, dev_info_t *dip,
int *max_supported);
static int pcie_get_max_supported(dev_info_t *dip, void *arg);
static int pcie_map_phys(dev_info_t *dip, pci_regspec_t *phys_spec,
caddr_t *addrp, ddi_acc_handle_t *handlep);
static void pcie_unmap_phys(ddi_acc_handle_t *handlep, pci_regspec_t *ph);
+static int pcie_link_bw_intr(dev_info_t *);
+static void pcie_capture_speeds(dev_info_t *);
dev_info_t *pcie_get_rc_dip(dev_info_t *dip);
@@ -182,8 +197,10 @@ _init(void)
pcie_nv_buf = kmem_alloc(ERPT_DATA_SZ, KM_SLEEP);
pcie_nvap = fm_nva_xcreate(pcie_nv_buf, ERPT_DATA_SZ);
pcie_nvl = fm_nvlist_create(pcie_nvap);
+ mutex_init(&pcie_link_tq_mutex, NULL, MUTEX_DRIVER, NULL);
if ((rval = mod_install(&modlinkage)) != 0) {
+ mutex_destroy(&pcie_link_tq_mutex);
fm_nvlist_destroy(pcie_nvl, FM_NVA_RETAIN);
fm_nva_xdestroy(pcie_nvap);
kmem_free(pcie_nv_buf, ERPT_DATA_SZ);
@@ -197,6 +214,10 @@ _fini()
int rval;
if ((rval = mod_remove(&modlinkage)) == 0) {
+ if (pcie_link_tq != NULL) {
+ taskq_destroy(pcie_link_tq);
+ }
+ mutex_destroy(&pcie_link_tq_mutex);
fm_nvlist_destroy(pcie_nvl, FM_NVA_RETAIN);
fm_nva_xdestroy(pcie_nvap);
kmem_free(pcie_nv_buf, ERPT_DATA_SZ);
@@ -217,6 +238,18 @@ pcie_init(dev_info_t *dip, caddr_t arg)
int ret = DDI_SUCCESS;
/*
+ * Our _init function is too early to create a taskq. Create the pcie
+ * link management taskq here now instead.
+ */
+ mutex_enter(&pcie_link_tq_mutex);
+ if (pcie_link_tq == NULL) {
+ pcie_link_tq = taskq_create("pcie_link", 1, minclsyspri, 0, 0,
+ 0);
+ }
+ mutex_exit(&pcie_link_tq_mutex);
+
+
+ /*
* Create a "devctl" minor node to support DEVCTL_DEVICE_*
* and DEVCTL_BUS_* ioctls to this bus.
*/
@@ -270,6 +303,10 @@ pcie_uninit(dev_info_t *dip)
return (ret);
}
+ if (pcie_link_bw_supported(dip)) {
+ (void) pcie_link_bw_disable(dip);
+ }
+
ddi_remove_minor_node(dip, "devctl");
return (ret);
@@ -319,7 +356,16 @@ pcie_hpintr_disable(dev_info_t *dip)
int
pcie_intr(dev_info_t *dip)
{
- return (pcie_hp_intr(dip));
+ int hp, lbw;
+
+ hp = pcie_hp_intr(dip);
+ lbw = pcie_link_bw_intr(dip);
+
+ if (hp == DDI_INTR_CLAIMED || lbw == DDI_INTR_CLAIMED) {
+ return (DDI_INTR_CLAIMED);
+ }
+
+ return (DDI_INTR_UNCLAIMED);
}
/* ARGSUSED */
@@ -657,6 +703,8 @@ pcie_initchild(dev_info_t *cdip)
pcie_enable_errors(cdip);
pcie_determine_serial(cdip);
+
+ pcie_capture_speeds(cdip);
}
bus_p->bus_ari = B_FALSE;
@@ -939,6 +987,120 @@ pcie_rc_fini_bus(dev_info_t *dip)
kmem_free(bus_p, sizeof (pcie_bus_t));
}
+static int
+pcie_width_to_int(pcie_link_width_t width)
+{
+ switch (width) {
+ case PCIE_LINK_WIDTH_X1:
+ return (1);
+ case PCIE_LINK_WIDTH_X2:
+ return (2);
+ case PCIE_LINK_WIDTH_X4:
+ return (4);
+ case PCIE_LINK_WIDTH_X8:
+ return (8);
+ case PCIE_LINK_WIDTH_X12:
+ return (12);
+ case PCIE_LINK_WIDTH_X16:
+ return (16);
+ case PCIE_LINK_WIDTH_X32:
+ return (32);
+ default:
+ return (0);
+ }
+}
+
+/*
+ * Return the speed in Transfers / second. This is a signed quantity to match
+ * the ndi/ddi property interfaces.
+ */
+static int64_t
+pcie_speed_to_int(pcie_link_speed_t speed)
+{
+ switch (speed) {
+ case PCIE_LINK_SPEED_2_5:
+ return (2500000000LL);
+ case PCIE_LINK_SPEED_5:
+ return (5000000000LL);
+ case PCIE_LINK_SPEED_8:
+ return (8000000000LL);
+ case PCIE_LINK_SPEED_16:
+ return (16000000000LL);
+ default:
+ return (0);
+ }
+}
+
+/*
+ * Translate the recorded speed information into devinfo properties.
+ */
+static void
+pcie_speeds_to_devinfo(dev_info_t *dip, pcie_bus_t *bus_p)
+{
+ if (bus_p->bus_max_width != PCIE_LINK_WIDTH_UNKNOWN) {
+ (void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+ "pcie-link-maximum-width",
+ pcie_width_to_int(bus_p->bus_max_width));
+ }
+
+ if (bus_p->bus_cur_width != PCIE_LINK_WIDTH_UNKNOWN) {
+ (void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+ "pcie-link-current-width",
+ pcie_width_to_int(bus_p->bus_cur_width));
+ }
+
+ if (bus_p->bus_cur_speed != PCIE_LINK_SPEED_UNKNOWN) {
+ (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
+ "pcie-link-current-speed",
+ pcie_speed_to_int(bus_p->bus_cur_speed));
+ }
+
+ if (bus_p->bus_max_speed != PCIE_LINK_SPEED_UNKNOWN) {
+ (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
+ "pcie-link-maximum-speed",
+ pcie_speed_to_int(bus_p->bus_max_speed));
+ }
+
+ if (bus_p->bus_target_speed != PCIE_LINK_SPEED_UNKNOWN) {
+ (void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
+ "pcie-link-target-speed",
+ pcie_speed_to_int(bus_p->bus_target_speed));
+ }
+
+ if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) {
+ (void) ndi_prop_create_boolean(DDI_DEV_T_NONE, dip,
+ "pcie-link-admin-target-speed");
+ }
+
+ if (bus_p->bus_sup_speed != PCIE_LINK_SPEED_UNKNOWN) {
+ int64_t speeds[4];
+ uint_t nspeeds = 0;
+
+ if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_2_5) {
+ speeds[nspeeds++] =
+ pcie_speed_to_int(PCIE_LINK_SPEED_2_5);
+ }
+
+ if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_5) {
+ speeds[nspeeds++] =
+ pcie_speed_to_int(PCIE_LINK_SPEED_5);
+ }
+
+ if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_8) {
+ speeds[nspeeds++] =
+ pcie_speed_to_int(PCIE_LINK_SPEED_8);
+ }
+
+ if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_16) {
+ speeds[nspeeds++] =
+ pcie_speed_to_int(PCIE_LINK_SPEED_16);
+ }
+
+ (void) ndi_prop_update_int64_array(DDI_DEV_T_NONE, dip,
+ "pcie-link-supported-speeds", speeds, nspeeds);
+ }
+}
+
/*
* We need to capture the supported, maximum, and current device speed and
* width. The way that this has been done has changed over time.
@@ -952,18 +1114,20 @@ pcie_rc_fini_bus(dev_info_t *dip)
* Now, a device may not implement some of these registers. To determine whether
* or not it's here, we have to do the following. First, we need to check the
* revision of the PCI express capability. The link capabilities 2 register did
- * not exist prior to version 2 of this register.
+ * not exist prior to version 2 of this capability. If a modern device does not
+ * implement it, it is supposed to return zero for the register.
*/
static void
-pcie_capture_speeds(pcie_bus_t *bus_p, pcie_req_id_t bdf, dev_info_t *rcdip)
+pcie_capture_speeds(dev_info_t *dip)
{
uint16_t vers, status;
- uint32_t val, cap, cap2;
+ uint32_t cap, cap2, ctl2;
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
if (!PCIE_IS_PCIE(bus_p))
return;
- vers = pci_cfgacc_get16(rcdip, bdf, bus_p->bus_pcie_off + PCIE_PCIECAP);
+ vers = PCIE_CAP_GET(16, bus_p, PCIE_PCIECAP);
if (vers == PCI_EINVAL16)
return;
vers &= PCIE_PCIECAP_VER_MASK;
@@ -974,24 +1138,28 @@ pcie_capture_speeds(pcie_bus_t *bus_p, pcie_req_id_t bdf, dev_info_t *rcdip)
switch (vers) {
case PCIE_PCIECAP_VER_1_0:
cap2 = 0;
+ ctl2 = 0;
break;
case PCIE_PCIECAP_VER_2_0:
- cap2 = pci_cfgacc_get32(rcdip, bdf, bus_p->bus_pcie_off +
- PCIE_LINKCAP2);
+ cap2 = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP2);
if (cap2 == PCI_EINVAL32)
cap2 = 0;
+ ctl2 = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL2);
+ if (ctl2 == PCI_EINVAL16)
+ ctl2 = 0;
break;
default:
/* Don't try and handle an unknown version */
return;
}
- status = pci_cfgacc_get16(rcdip, bdf, bus_p->bus_pcie_off +
- PCIE_LINKSTS);
- cap = pci_cfgacc_get32(rcdip, bdf, bus_p->bus_pcie_off + PCIE_LINKCAP);
+ status = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
+ cap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP);
if (status == PCI_EINVAL16 || cap == PCI_EINVAL32)
return;
+ mutex_enter(&bus_p->bus_speed_mutex);
+
switch (status & PCIE_LINKSTS_SPEED_MASK) {
case PCIE_LINKSTS_SPEED_2_5:
bus_p->bus_cur_speed = PCIE_LINK_SPEED_2_5;
@@ -1104,13 +1272,32 @@ pcie_capture_speeds(pcie_bus_t *bus_p, pcie_req_id_t bdf, dev_info_t *rcdip)
bus_p->bus_max_speed = PCIE_LINK_SPEED_5;
bus_p->bus_sup_speed = PCIE_LINK_SPEED_2_5 |
PCIE_LINK_SPEED_5;
- }
-
- if (cap & PCIE_LINKCAP_MAX_SPEED_2_5) {
+ } else if (cap & PCIE_LINKCAP_MAX_SPEED_2_5) {
bus_p->bus_max_speed = PCIE_LINK_SPEED_2_5;
bus_p->bus_sup_speed = PCIE_LINK_SPEED_2_5;
}
}
+
+ switch (ctl2 & PCIE_LINKCTL2_TARGET_SPEED_MASK) {
+ case PCIE_LINKCTL2_TARGET_SPEED_2_5:
+ bus_p->bus_target_speed = PCIE_LINK_SPEED_2_5;
+ break;
+ case PCIE_LINKCTL2_TARGET_SPEED_5:
+ bus_p->bus_target_speed = PCIE_LINK_SPEED_5;
+ break;
+ case PCIE_LINKCTL2_TARGET_SPEED_8:
+ bus_p->bus_target_speed = PCIE_LINK_SPEED_8;
+ break;
+ case PCIE_LINKCTL2_TARGET_SPEED_16:
+ bus_p->bus_target_speed = PCIE_LINK_SPEED_16;
+ break;
+ default:
+ bus_p->bus_target_speed = PCIE_LINK_SPEED_UNKNOWN;
+ break;
+ }
+
+ pcie_speeds_to_devinfo(dip, bus_p);
+ mutex_exit(&bus_p->bus_speed_mutex);
}
/*
@@ -1186,7 +1373,7 @@ pcie_init_bus(dev_info_t *dip, pcie_req_id_t bdf, uint8_t flags)
uint16_t status, base, baseptr, num_cap;
uint32_t capid;
int range_size;
- pcie_bus_t *bus_p;
+ pcie_bus_t *bus_p = NULL;
dev_info_t *rcdip;
dev_info_t *pdip;
const char *errstr = NULL;
@@ -1406,15 +1593,15 @@ initial_done:
pcie_init_plat(dip);
- pcie_capture_speeds(bus_p, bdf, rcdip);
-
final_done:
PCIE_DBG("Add %s(dip 0x%p, bdf 0x%x, secbus 0x%x)\n",
ddi_driver_name(dip), (void *)dip, bus_p->bus_bdf,
bus_p->bus_bdg_secbus);
#ifdef DEBUG
- pcie_print_bus(bus_p);
+ if (bus_p != NULL) {
+ pcie_print_bus(bus_p);
+ }
#endif
return (bus_p);
@@ -2640,3 +2827,337 @@ pcie_check_io_mem_range(ddi_acc_handle_t cfg_hdl, boolean_t *empty_io_range,
}
#endif /* defined(__i386) || defined(__amd64) */
+
+boolean_t
+pcie_link_bw_supported(dev_info_t *dip)
+{
+ uint32_t linkcap;
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
+
+ if (!PCIE_IS_PCIE(bus_p)) {
+ return (B_FALSE);
+ }
+
+ if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
+ return (B_FALSE);
+ }
+
+ linkcap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP);
+ return ((linkcap & PCIE_LINKCAP_LINK_BW_NOTIFY_CAP) != 0);
+}
+
+int
+pcie_link_bw_enable(dev_info_t *dip)
+{
+ uint16_t linkctl;
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
+
+ if (!pcie_link_bw_supported(dip)) {
+ return (DDI_FAILURE);
+ }
+
+ mutex_init(&bus_p->bus_lbw_mutex, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&bus_p->bus_lbw_cv, NULL, CV_DRIVER, NULL);
+ linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
+ linkctl |= PCIE_LINKCTL_LINK_BW_INTR_EN;
+ linkctl |= PCIE_LINKCTL_LINK_AUTO_BW_INTR_EN;
+ PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, linkctl);
+
+ bus_p->bus_lbw_pbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ bus_p->bus_lbw_cbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ bus_p->bus_lbw_state |= PCIE_LBW_S_ENABLED;
+
+ return (DDI_SUCCESS);
+}
+
+int
+pcie_link_bw_disable(dev_info_t *dip)
+{
+ uint16_t linkctl;
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
+
+ if ((bus_p->bus_lbw_state & PCIE_LBW_S_ENABLED) == 0) {
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&bus_p->bus_lbw_mutex);
+ while ((bus_p->bus_lbw_state &
+ (PCIE_LBW_S_DISPATCHED | PCIE_LBW_S_RUNNING)) != 0) {
+ cv_wait(&bus_p->bus_lbw_cv, &bus_p->bus_lbw_mutex);
+ }
+ mutex_exit(&bus_p->bus_lbw_mutex);
+
+ linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
+ linkctl &= ~PCIE_LINKCTL_LINK_BW_INTR_EN;
+ linkctl &= ~PCIE_LINKCTL_LINK_AUTO_BW_INTR_EN;
+ PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, linkctl);
+
+ bus_p->bus_lbw_state &= ~PCIE_LBW_S_ENABLED;
+ kmem_free(bus_p->bus_lbw_pbuf, MAXPATHLEN);
+ kmem_free(bus_p->bus_lbw_cbuf, MAXPATHLEN);
+ bus_p->bus_lbw_pbuf = NULL;
+ bus_p->bus_lbw_cbuf = NULL;
+
+ mutex_destroy(&bus_p->bus_lbw_mutex);
+ cv_destroy(&bus_p->bus_lbw_cv);
+
+ return (DDI_SUCCESS);
+}
+
+void
+pcie_link_bw_taskq(void *arg)
+{
+ dev_info_t *dip = arg;
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
+ dev_info_t *cdip;
+ boolean_t again;
+ sysevent_t *se;
+ sysevent_value_t se_val;
+ sysevent_id_t eid;
+ sysevent_attr_list_t *ev_attr_list;
+ int circular;
+
+top:
+ ndi_devi_enter(dip, &circular);
+ se = NULL;
+ ev_attr_list = NULL;
+ mutex_enter(&bus_p->bus_lbw_mutex);
+ bus_p->bus_lbw_state &= ~PCIE_LBW_S_DISPATCHED;
+ bus_p->bus_lbw_state |= PCIE_LBW_S_RUNNING;
+ mutex_exit(&bus_p->bus_lbw_mutex);
+
+ /*
+ * Update our own speeds as we've likely changed something.
+ */
+ pcie_capture_speeds(dip);
+
+ /*
+ * Walk our children. We only care about updating this on function 0
+ * because the PCIe specification requires that these all be the same
+ * otherwise.
+ */
+ for (cdip = ddi_get_child(dip); cdip != NULL;
+ cdip = ddi_get_next_sibling(cdip)) {
+ pcie_bus_t *cbus_p = PCIE_DIP2BUS(cdip);
+
+ if (cbus_p == NULL) {
+ continue;
+ }
+
+ if ((cbus_p->bus_bdf & PCIE_REQ_ID_FUNC_MASK) != 0) {
+ continue;
+ }
+
+ /*
+ * It's possible that this can fire while a child is otherwise
+ * only partially constructed. Therefore, if we don't have the
+ * config handle, don't bother updating the child.
+ */
+ if (cbus_p->bus_cfg_hdl == NULL) {
+ continue;
+ }
+
+ pcie_capture_speeds(cdip);
+ break;
+ }
+
+ se = sysevent_alloc(EC_PCIE, ESC_PCIE_LINK_STATE,
+ ILLUMOS_KERN_PUB "pcie", SE_SLEEP);
+
+ (void) ddi_pathname(dip, bus_p->bus_lbw_pbuf);
+ se_val.value_type = SE_DATA_TYPE_STRING;
+ se_val.value.sv_string = bus_p->bus_lbw_pbuf;
+ if (sysevent_add_attr(&ev_attr_list, PCIE_EV_DETECTOR_PATH, &se_val,
+ SE_SLEEP) != 0) {
+ ndi_devi_exit(dip, circular);
+ goto err;
+ }
+
+ if (cdip != NULL) {
+ (void) ddi_pathname(cdip, bus_p->bus_lbw_cbuf);
+
+ se_val.value_type = SE_DATA_TYPE_STRING;
+ se_val.value.sv_string = bus_p->bus_lbw_cbuf;
+
+ /*
+ * If this fails, that's OK. We'd rather get the event off and
+ * there's a chance that there may not be anything there for us.
+ */
+ (void) sysevent_add_attr(&ev_attr_list, PCIE_EV_CHILD_PATH,
+ &se_val, SE_SLEEP);
+ }
+
+ ndi_devi_exit(dip, circular);
+
+ /*
+ * Before we generate and send down a sysevent, we need to tell the
+ * system that parts of the devinfo cache need to be invalidated. While
+ * the function below takes several args, it ignores them all. Because
+ * this is a global invalidation, we don't bother trying to do much more
+ * than requesting a global invalidation, lest we accidentally kick off
+ * several in a row.
+ */
+ ddi_prop_cache_invalidate(DDI_DEV_T_NONE, NULL, NULL, 0);
+
+ if (sysevent_attach_attributes(se, ev_attr_list) != 0) {
+ goto err;
+ }
+ ev_attr_list = NULL;
+
+ if (log_sysevent(se, SE_SLEEP, &eid) != 0) {
+ goto err;
+ }
+
+err:
+ sysevent_free_attr(ev_attr_list);
+ sysevent_free(se);
+
+ mutex_enter(&bus_p->bus_lbw_mutex);
+ bus_p->bus_lbw_state &= ~PCIE_LBW_S_RUNNING;
+ cv_broadcast(&bus_p->bus_lbw_cv);
+ again = (bus_p->bus_lbw_state & PCIE_LBW_S_DISPATCHED) != 0;
+ mutex_exit(&bus_p->bus_lbw_mutex);
+
+ if (again) {
+ goto top;
+ }
+}
+
+int
+pcie_link_bw_intr(dev_info_t *dip)
+{
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
+ uint16_t linksts;
+ uint16_t flags = PCIE_LINKSTS_LINK_BW_MGMT | PCIE_LINKSTS_AUTO_BW;
+ dev_info_t *cdip;
+ sysevent_t *se = NULL;
+ sysevent_value_t se_val;
+ sysevent_id_t eid;
+ sysevent_attr_list_t *ev_attr_list = NULL;
+
+ if ((bus_p->bus_lbw_state & PCIE_LBW_S_ENABLED) == 0) {
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ linksts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
+ if ((linksts & flags) == 0) {
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ /*
+ * Check if we've already dispatched this event. If we have already
+ * dispatched it, then there's nothing else to do, we coalesce multiple
+ * events.
+ */
+ mutex_enter(&bus_p->bus_lbw_mutex);
+ bus_p->bus_lbw_nevents++;
+ if ((bus_p->bus_lbw_state & PCIE_LBW_S_DISPATCHED) == 0) {
+ if ((bus_p->bus_lbw_state & PCIE_LBW_S_RUNNING) == 0) {
+ taskq_dispatch_ent(pcie_link_tq, pcie_link_bw_taskq,
+ dip, 0, &bus_p->bus_lbw_ent);
+ }
+
+ bus_p->bus_lbw_state |= PCIE_LBW_S_DISPATCHED;
+ }
+ mutex_exit(&bus_p->bus_lbw_mutex);
+
+ PCIE_CAP_PUT(16, bus_p, PCIE_LINKSTS, flags);
+ return (DDI_INTR_CLAIMED);
+}
+
+int
+pcie_link_set_target(dev_info_t *dip, pcie_link_speed_t speed)
+{
+ uint16_t ctl2, rval;
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
+
+ if (!PCIE_IS_PCIE(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ switch (speed) {
+ case PCIE_LINK_SPEED_2_5:
+ rval = PCIE_LINKCTL2_TARGET_SPEED_2_5;
+ break;
+ case PCIE_LINK_SPEED_5:
+ rval = PCIE_LINKCTL2_TARGET_SPEED_5;
+ break;
+ case PCIE_LINK_SPEED_8:
+ rval = PCIE_LINKCTL2_TARGET_SPEED_8;
+ break;
+ case PCIE_LINK_SPEED_16:
+ rval = PCIE_LINKCTL2_TARGET_SPEED_16;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ mutex_enter(&bus_p->bus_speed_mutex);
+ bus_p->bus_target_speed = speed;
+ bus_p->bus_speed_flags |= PCIE_LINK_F_ADMIN_TARGET;
+
+ ctl2 = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL2);
+ ctl2 &= ~PCIE_LINKCTL2_TARGET_SPEED_MASK;
+ ctl2 |= rval;
+ PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL2, ctl2);
+ mutex_exit(&bus_p->bus_speed_mutex);
+
+ /*
+ * Make sure our updates have been reflected in devinfo.
+ */
+ pcie_capture_speeds(dip);
+
+ return (0);
+}
+
+int
+pcie_link_retrain(dev_info_t *dip)
+{
+ uint16_t ctl;
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
+
+ if (!PCIE_IS_PCIE(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ /*
+ * The PCIe specification suggests that we make sure that the link isn't
+ * in training before issuing this command in case there was a state
+ * machine transition prior to when we got here. We wait and then go
+ * ahead and issue the command anyways.
+ */
+ for (uint32_t i = 0; i < pcie_link_retrain_count; i++) {
+ uint16_t sts;
+
+ sts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
+ if ((sts & PCIE_LINKSTS_LINK_TRAINING) == 0)
+ break;
+ delay(drv_usectohz(pcie_link_retrain_delay_ms * 1000));
+ }
+
+ ctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
+ ctl |= PCIE_LINKCTL_RETRAIN_LINK;
+ PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, ctl);
+
+ /*
+ * Wait again to see if it clears before returning to the user.
+ */
+ for (uint32_t i = 0; i < pcie_link_retrain_count; i++) {
+ uint16_t sts;
+
+ sts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
+ if ((sts & PCIE_LINKSTS_LINK_TRAINING) == 0)
+ break;
+ delay(drv_usectohz(pcie_link_retrain_delay_ms * 1000));
+ }
+
+ return (0);
+}
diff --git a/usr/src/uts/common/io/pciex/pcie_pwr.c b/usr/src/uts/common/io/pciex/pcie_pwr.c
index 41ffa02726..9aeee8d238 100644
--- a/usr/src/uts/common/io/pciex/pcie_pwr.c
+++ b/usr/src/uts/common/io/pciex/pcie_pwr.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
#include <sys/types.h>
@@ -752,7 +753,7 @@ pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p)
* Called when the child makes the first power management call.
* sets up the counters. All the components of the child device are
* assumed to be at unknown level. It also releases the power hold
- * pwr_p - parent's pwr_t
+ * pwr_p - parent's pwr_t
* cdip - child's dip
*/
int
@@ -908,7 +909,8 @@ pcie_pwr_resume(dev_info_t *dip)
pcie_clear_errors(cdip);
/* PCIe workaround: disable errors during 4K config resore */
- if (is_pcie = pcie_is_pcie(cdip))
+ is_pcie = pcie_is_pcie(cdip);
+ if (is_pcie)
pcie_disable_errors(cdip);
(void) pci_restore_config_regs(cdip);
if (is_pcie) {
@@ -1041,7 +1043,8 @@ pcie_pwr_suspend(dev_info_t *dip)
ddi_driver_name(cdip), ddi_get_instance(cdip));
/* PCIe workaround: disable errors during 4K config save */
- if (is_pcie = pcie_is_pcie(cdip))
+ is_pcie = pcie_is_pcie(cdip);
+ if (is_pcie)
pcie_disable_errors(cdip);
(void) pci_save_config_regs(cdip);
if (is_pcie) {
diff --git a/usr/src/uts/common/io/pciex/pcieb.c b/usr/src/uts/common/io/pciex/pcieb.c
index bdf7a61016..c9d65748bb 100644
--- a/usr/src/uts/common/io/pciex/pcieb.c
+++ b/usr/src/uts/common/io/pciex/pcieb.c
@@ -23,10 +23,121 @@
*/
/*
* Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
/*
* Common x86 and SPARC PCI-E to PCI bus bridge nexus driver
+ *
+ * Background
+ * ----------
+ *
+ * The PCI Express (PCIe) specification defines that all of the PCIe devices in
+ * the system are connected together in a series of different fabrics. A way to
+ * think of these fabrics is that they are small networks where there are links
+ * between different devices and switches that allow fan out or fan in of the
+ * fabric. The entry point to that fabric is called a root complex and the
+ * fabric terminates at a what is called an endpoint, which is really just PCIe
+ * terminology for the common cards that are inserted into the system (HBAs,
+ * NICs, USB, NVMe, etc.).
+ *
+ * The PCIe specification states that every link on the system has a virtual
+ * PCI-to-PCI bridge. This allows PCIe devices to still be configured the same
+ * way traditional PCI devices are to the operating system and allows them to
+ * have a traditional PCI bus, device, and function associated with them, even
+ * though there is no actual shared bus. In addition, bridges are also used to
+ * connect traditional PCI and PCI-X devices into them.
+ *
+ * The PCIe specification refers to upstream and downstream ports. Upstream
+ * ports are considered closer the root complex and downstream ports are closer
+ * to the endpoint. We can divide the devices that the bridge driver attaches to
+ * into two groups. Those that are considered upstream ports, these include root
+ * complexes and parts of PCIe switches. And downstream ports, which are the
+ * other half of PCIe switches and endpoints (which this driver does not attach
+ * to, normal hardware-specific or class-specific drivers attach to those).
+ *
+ * Interrupt Management
+ * --------------------
+ *
+ * Upstream ports of bridges have additional things that we care about.
+ * Specifically they're the means through which we find out about:
+ *
+ * - Advanced Error Reporting (AERs)
+ * - Hotplug events
+ * - Link Bandwidth Events
+ * - Power Management Events (PME)
+ *
+ * Each of these features is an optional feature (though ones we hope are
+ * implemented). The features above are grouped into two different buckets based
+ * on which PCI capability they appear in. AER management is done through a PCI
+ * Express extended configuration header (it lives in extended PCI configuration
+ * space) called the 'Advanced Error Reporting Extended Capability'. The other
+ * events are all managed as part of the 'PCI Express Capability Structure'.
+ * This structure is found in traditional PCI configuration space.
+ *
+ * The way that the interrupts are programmed for these types of events differs
+ * a bit from the way one might expect a normal device to operate. For most
+ * devices, one allocates a number of interrupts based on a combination of what
+ * the device supports, what the OS supports per device, and the number the
+ * driver needs. Then the driver programs the device in a device-specific manner
+ * to indicate which events should trigger a specific interrupt vector.
+ *
+ * However, for both the AER and PCI capabilities, the driver has to do
+ * something different. The driver first allocates interrupts by programming the
+ * MSI or MSI-X table and then asks the device which interrupts have been
+ * assigned to these purposes. Because these events are only supported in
+ * 'upstream' devices, this does not interfere with the traditional management
+ * of MSI and MSI-X interrupts. At this time, the pcieb driver only supports the
+ * use of MSI interrupts.
+ *
+ * Once the interrupts have been allocated, we read back which vectors have been
+ * nominated by the device to cover the corresponding capability. The interrupt
+ * is allocated on a per-capability basis. Therefore, one interrupt would cover
+ * AERs, while another interrupt would cover the rest of the desired functions.
+ *
+ * To track which interrupts cover which behaviors, each driver state
+ * (pcieb_devstate_t) has a member called 'pcieb_isr_tab'. Each index represents
+ * an interrupt vector and there are a series of flags that represent the
+ * different possible interrupt sources: PCIEB_INTR_SRC_HP (hotplug),
+ * PCEIB_INTR_SRC_PME (power management event), PCIEB_INTR_SRC_AER (error
+ * reporting), PCIEB_INTR_SRC_LBW (link bandwidth).
+ *
+ * Because the hotplug, link bandwidth, and power management events all share
+ * the same vector, if an interrupt comes in, we must check all of the enabled
+ * sources that might generate this interrupt. It is highly likely that more
+ * than one will fire at the same time, for example, a hotplug event that fires
+ * because a device has been inserted or removed, will likely trigger a link
+ * bandwidth event.
+ *
+ * The pcieb driver itself does not actually have much logic to deal with and
+ * clear the interrupts in question. It generally speaking will vector most
+ * events back to the more general pcie driver or, in the case of AERs, initiate
+ * a scan of the fabric itself (also part of the pcie driver).
+ *
+ * Link Management
+ * ---------------
+ *
+ * The pcieb driver is used to take care of two different aspects of link
+ * management. The first of these, as described briefly above, is to monitor for
+ * changes to the negotiated link bandwidth. These events are managed by
+ * enabling support for the interrupts in the PCI Express Capability Structure.
+ * This is all taken care of by the pcie driver through functions like
+ * pcie_link_bw_enabled().
+ *
+ * The second aspect of link management the pcieb driver enables is the ability
+ * to retrain the link and optionally limit the speed. This is enabled through a
+ * series of private ioctls that are driven through a private userland utility,
+ * /usr/lib/pci/pcieb. Eventually, this should be more fleshed out and a more
+ * uniform interface based around the devctls that can be leveraged across
+ * different classes of devices should be used.
+ *
+ * Under the hood this basically leverages the ability of the upstream port to
+ * retrain a link by writing a bit to the PCIe link control register. See
+ * pcieb_ioctl_retrain(). From there, if the driver ever receives a request to
+ * change the maximum speed, that is updated in the card; however, it does not
+ * immediately retrain the link. A separate ioctl request is required to do so.
+ * Once the speed has been changed, regardless of whether or not it has been
+ * retrained, that fact will always be noted.
*/
#include <sys/sysmacros.h>
@@ -52,6 +163,7 @@
#include <sys/pcie_pwr.h>
#include <sys/hotplug/pci/pcie_hp.h>
#include "pcieb.h"
+#include "pcieb_ioctl.h"
#ifdef PX_PLX
#include <io/pciex/pcieb_plx.h>
#endif /* PX_PLX */
@@ -344,7 +456,7 @@ pcieb_41210_mps_wkrnd(dev_info_t *cdip)
~(PCIE_DEVCTL_MAX_READ_REQ_MASK |
PCIE_DEVCTL_MAX_PAYLOAD_MASK)) | cdip_mrrs_mps;
- PCI_CAP_PUT16(cfg_hdl, 0, cap_ptr, PCIE_DEVCTL,
+ (void) PCI_CAP_PUT16(cfg_hdl, 0, cap_ptr, PCIE_DEVCTL,
sdip_dev_ctrl);
}
@@ -475,6 +587,8 @@ pcieb_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
(void) pcie_hpintr_enable(devi);
+ (void) pcie_link_bw_enable(devi);
+
/* Do any platform specific workarounds needed at this time */
pcieb_plat_attach_workaround(devi);
@@ -994,7 +1108,6 @@ FAIL:
* by the device. If features are not enabled first, the
* device might not ask for any interrupts.
*/
-
static int
pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type)
{
@@ -1002,39 +1115,47 @@ pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type)
int nintrs, request, count, x;
int intr_cap = 0;
int inum = 0;
- int ret, hp_msi_off;
+ int ret;
pcie_bus_t *bus_p = PCIE_DIP2UPBUS(dip);
uint16_t vendorid = bus_p->bus_dev_ven_id & 0xFFFF;
boolean_t is_hp = B_FALSE;
boolean_t is_pme = B_FALSE;
+ boolean_t is_lbw = B_FALSE;
PCIEB_DEBUG(DBG_ATTACH, dip, "pcieb_intr_init: Attaching %s handler\n",
(intr_type == DDI_INTR_TYPE_MSI) ? "MSI" : "INTx");
request = 0;
if (PCIE_IS_HOTPLUG_ENABLED(dip)) {
- request++;
is_hp = B_TRUE;
}
- /*
- * Hotplug and PME share the same MSI vector. If hotplug is not
- * supported check if MSI is needed for PME.
- */
if ((intr_type == DDI_INTR_TYPE_MSI) && PCIE_IS_RP(bus_p) &&
(vendorid == NVIDIA_VENDOR_ID)) {
is_pme = B_TRUE;
- if (!is_hp)
- request++;
+ }
+
+ if (intr_type == DDI_INTR_TYPE_MSI && pcie_link_bw_supported(dip)) {
+ is_lbw = B_TRUE;
}
/*
- * Setup MSI if this device is a Rootport and has AER. Currently no
- * SPARC Root Port supports fabric errors being reported through it.
+ * The hot-plug, link bandwidth, and power management events all are
+ * based on the PCI Express capability. Therefore, they all share their
+ * own interrupt.
+ */
+ if (is_hp || is_pme || is_lbw) {
+ request++;
+ }
+
+ /*
+ * If this device is a root port, which means it can have MSI interrupts
+ * enabled for AERs, then we need to request one.
*/
if (intr_type == DDI_INTR_TYPE_MSI) {
- if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p))
+ if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) {
request++;
+ }
}
if (request == 0)
@@ -1166,22 +1287,31 @@ pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type)
/* Get the MSI offset for hotplug/PME from the PCIe cap reg */
if (intr_type == DDI_INTR_TYPE_MSI) {
- hp_msi_off = PCI_CAP_GET16(bus_p->bus_cfg_hdl, 0,
+ uint16_t pcie_msi_off;
+ pcie_msi_off = PCI_CAP_GET16(bus_p->bus_cfg_hdl, 0,
bus_p->bus_pcie_off, PCIE_PCIECAP) &
PCIE_PCIECAP_INT_MSG_NUM;
- if (hp_msi_off >= count) {
- PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %d in PCIe "
- "cap > max allocated %d\n", hp_msi_off, count);
+ if (pcie_msi_off >= count) {
+ PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %u in PCIe "
+ "cap > max allocated %d\n", pcie_msi_off, count);
mutex_exit(&pcieb->pcieb_intr_mutex);
goto FAIL;
}
- if (is_hp)
- pcieb->pcieb_isr_tab[hp_msi_off] |= PCIEB_INTR_SRC_HP;
+ if (is_hp) {
+ pcieb->pcieb_isr_tab[pcie_msi_off] |= PCIEB_INTR_SRC_HP;
+ }
+
+ if (is_pme) {
+ pcieb->pcieb_isr_tab[pcie_msi_off] |=
+ PCIEB_INTR_SRC_PME;
+ }
- if (is_pme)
- pcieb->pcieb_isr_tab[hp_msi_off] |= PCIEB_INTR_SRC_PME;
+ if (is_lbw) {
+ pcieb->pcieb_isr_tab[pcie_msi_off] |=
+ PCIEB_INTR_SRC_LBW;
+ }
} else {
/* INTx handles only Hotplug interrupts */
if (is_hp)
@@ -1353,6 +1483,128 @@ pcieb_close(dev_t dev, int flags, int otyp, cred_t *credp)
}
static int
+pcieb_ioctl_retrain(pcieb_devstate_t *pcieb, cred_t *credp)
+{
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
+
+ if (drv_priv(credp) != 0) {
+ return (EPERM);
+ }
+
+ if (!PCIE_IS_PCIE(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ return (pcie_link_retrain(pcieb->pcieb_dip));
+}
+
+static int
+pcieb_ioctl_get_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode,
+ cred_t *credp)
+{
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
+ pcieb_ioctl_target_speed_t pits;
+
+ if (drv_priv(credp) != 0) {
+ return (EPERM);
+ }
+
+ if (!PCIE_IS_PCIE(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ pits.pits_flags = 0;
+ pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN;
+
+ mutex_enter(&bus_p->bus_speed_mutex);
+ if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) {
+ pits.pits_flags |= PCIEB_FLAGS_ADMIN_SET;
+ }
+ switch (bus_p->bus_target_speed) {
+ case PCIE_LINK_SPEED_2_5:
+ pits.pits_speed = PCIEB_LINK_SPEED_GEN1;
+ break;
+ case PCIE_LINK_SPEED_5:
+ pits.pits_speed = PCIEB_LINK_SPEED_GEN2;
+ break;
+ case PCIE_LINK_SPEED_8:
+ pits.pits_speed = PCIEB_LINK_SPEED_GEN3;
+ break;
+ case PCIE_LINK_SPEED_16:
+ pits.pits_speed = PCIEB_LINK_SPEED_GEN4;
+ break;
+ default:
+ pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN;
+ break;
+ }
+ mutex_exit(&bus_p->bus_speed_mutex);
+
+ if (ddi_copyout(&pits, (void *)arg, sizeof (pits),
+ mode & FKIOCTL) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+pcieb_ioctl_set_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode,
+ cred_t *credp)
+{
+ pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
+ pcieb_ioctl_target_speed_t pits;
+ pcie_link_speed_t speed;
+
+ if (drv_priv(credp) != 0) {
+ return (EPERM);
+ }
+
+ if (!PCIE_IS_PCIE(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
+ return (ENOTSUP);
+ }
+
+ if (ddi_copyin((void *)arg, &pits, sizeof (pits),
+ mode & FKIOCTL) != 0) {
+ return (EFAULT);
+ }
+
+ if (pits.pits_flags != 0) {
+ return (EINVAL);
+ }
+
+ switch (pits.pits_speed) {
+ case PCIEB_LINK_SPEED_GEN1:
+ speed = PCIE_LINK_SPEED_2_5;
+ break;
+ case PCIEB_LINK_SPEED_GEN2:
+ speed = PCIE_LINK_SPEED_5;
+ break;
+ case PCIEB_LINK_SPEED_GEN3:
+ speed = PCIE_LINK_SPEED_8;
+ break;
+ case PCIEB_LINK_SPEED_GEN4:
+ speed = PCIE_LINK_SPEED_16;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (pcie_link_set_target(pcieb->pcieb_dip, speed));
+}
+
+static int
pcieb_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
int *rvalp)
{
@@ -1363,8 +1615,28 @@ pcieb_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
if (pcieb == NULL)
return (ENXIO);
- /* To handle devctl and hotplug related ioctls */
- rv = pcie_ioctl(pcieb->pcieb_dip, dev, cmd, arg, mode, credp, rvalp);
+ /*
+ * Check if this is one of the commands that the bridge driver natively
+ * understands. There are only a handful of such private ioctls defined
+ * in pcieb_ioctl.h. Otherwise, this ioctl should be handled by the
+ * general pcie driver.
+ */
+ switch (cmd) {
+ case PCIEB_IOCTL_RETRAIN:
+ rv = pcieb_ioctl_retrain(pcieb, credp);
+ break;
+ case PCIEB_IOCTL_GET_TARGET_SPEED:
+ rv = pcieb_ioctl_get_speed(pcieb, arg, mode, credp);
+ break;
+ case PCIEB_IOCTL_SET_TARGET_SPEED:
+ rv = pcieb_ioctl_set_speed(pcieb, arg, mode, credp);
+ break;
+ default:
+ /* To handle devctl and hotplug related ioctls */
+ rv = pcie_ioctl(pcieb->pcieb_dip, dev, cmd, arg, mode, credp,
+ rvalp);
+ break;
+ }
return (rv);
}
@@ -1395,7 +1667,7 @@ pcieb_intr_handler(caddr_t arg1, caddr_t arg2)
if (isrc == PCIEB_INTR_SRC_UNKNOWN)
goto FAIL;
- if (isrc & PCIEB_INTR_SRC_HP)
+ if (isrc & (PCIEB_INTR_SRC_HP | PCIEB_INTR_SRC_LBW))
ret = pcie_intr(dip);
if (isrc & PCIEB_INTR_SRC_PME)
diff --git a/usr/src/uts/common/io/pciex/pcieb.h b/usr/src/uts/common/io/pciex/pcieb.h
index 0868458805..7c1e40ecd9 100644
--- a/usr/src/uts/common/io/pciex/pcieb.h
+++ b/usr/src/uts/common/io/pciex/pcieb.h
@@ -23,7 +23,7 @@
*/
/*
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_PCIEB_H
@@ -139,6 +139,7 @@ extern void *pcieb_state;
#define PCIEB_INTR_SRC_HP 0x1
#define PCIEB_INTR_SRC_PME 0x2
#define PCIEB_INTR_SRC_AER 0x4
+#define PCIEB_INTR_SRC_LBW 0x8
/*
* Need to put vendor ids in a common file and not platform specific files
diff --git a/usr/src/uts/common/io/pciex/pcieb_ioctl.h b/usr/src/uts/common/io/pciex/pcieb_ioctl.h
new file mode 100644
index 0000000000..40134037a4
--- /dev/null
+++ b/usr/src/uts/common/io/pciex/pcieb_ioctl.h
@@ -0,0 +1,64 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _IO_PCIE_PCIEB_IOCTL_H
+#define _IO_PCIE_PCIEB_IOCTL_H
+
+/*
+ * These are private ioctls for PCIe bridges that are currently consumed by the
+ * 'pcieb' command. These should be used until we figure out how best to
+ * represent PCIe links in the traditional cfgadm and devctl frameworks.
+ */
+
+#include <sys/stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PCIEB_IOCTL (('p' << 24) | ('c' << 16) | ('b' << 8))
+
+/*
+ * This requests that we retrain the link that the PCIe bridge has to its
+ * downstream component.
+ */
+#define PCIEB_IOCTL_RETRAIN (PCIEB_IOCTL | 0x01)
+
+/*
+ * Get and set the current target speed for a bridge. The target speed of the
+ * bridge will have an impact on the values that end up being used by its
+ * downstream components.
+ */
+#define PCIEB_IOCTL_GET_TARGET_SPEED (PCIEB_IOCTL | 0x02)
+#define PCIEB_IOCTL_SET_TARGET_SPEED (PCIEB_IOCTL | 0x03)
+
+typedef struct pcieb_ioctl_target_speed {
+ uint32_t pits_flags;
+ uint32_t pits_speed;
+} pcieb_ioctl_target_speed_t;
+
+#define PCIEB_FLAGS_ADMIN_SET 0x01
+
+#define PCIEB_LINK_SPEED_UNKNOWN 0x00
+#define PCIEB_LINK_SPEED_GEN1 0x01
+#define PCIEB_LINK_SPEED_GEN2 0x02
+#define PCIEB_LINK_SPEED_GEN3 0x03
+#define PCIEB_LINK_SPEED_GEN4 0x04
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _IO_PCIE_PCIEB_IOCTL_H */
diff --git a/usr/src/uts/common/sys/pcie.h b/usr/src/uts/common/sys/pcie.h
index a26729c523..e8f91a1390 100644
--- a/usr/src/uts/common/sys/pcie.h
+++ b/usr/src/uts/common/sys/pcie.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_PCIE_H
@@ -494,6 +494,11 @@ extern "C" {
/*
* Link Control 2 Register (2 bytes)
*/
+
+#define PCIE_LINKCTL2_TARGET_SPEED_2_5 0x1 /* 2.5 GT/s Speed */
+#define PCIE_LINKCTL2_TARGET_SPEED_5 0x2 /* 5.0 GT/s Speed */
+#define PCIE_LINKCTL2_TARGET_SPEED_8 0x3 /* 8.0 GT/s Speed */
+#define PCIE_LINKCTL2_TARGET_SPEED_16 0x4 /* 16.0 GT/s Speed */
#define PCIE_LINKCTL2_TARGET_SPEED_MASK 0x000f
#define PICE_LINKCTL2_ENTER_COMPLIANCE 0x0010
#define PCIE_LINKCTL2_HW_AUTO_SPEED_DIS 0x0020
diff --git a/usr/src/uts/common/sys/pcie_impl.h b/usr/src/uts/common/sys/pcie_impl.h
index faebc9d020..d1d13625c2 100644
--- a/usr/src/uts/common/sys/pcie_impl.h
+++ b/usr/src/uts/common/sys/pcie_impl.h
@@ -20,11 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019, Joyent, Inc.
- */
-
-/*
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_PCIE_IMPL_H
@@ -36,6 +32,7 @@ extern "C" {
#include <sys/pcie.h>
#include <sys/pciev.h>
+#include <sys/taskq_impl.h>
#define PCI_GET_BDF(dip) \
PCIE_DIP2BUS(dip)->bus_bdf
@@ -303,12 +300,22 @@ typedef enum pcie_link_width {
*/
typedef enum pcie_link_speed {
PCIE_LINK_SPEED_UNKNOWN = 0x00,
- PCIE_LINK_SPEED_2_5 = 0x01,
- PCIE_LINK_SPEED_5 = 0x02,
- PCIE_LINK_SPEED_8 = 0x04,
- PCIE_LINK_SPEED_16 = 0x08
+ PCIE_LINK_SPEED_2_5 = 1 << 0,
+ PCIE_LINK_SPEED_5 = 1 << 1,
+ PCIE_LINK_SPEED_8 = 1 << 2,
+ PCIE_LINK_SPEED_16 = 1 << 3
} pcie_link_speed_t;
+typedef enum pcie_link_flags {
+ PCIE_LINK_F_ADMIN_TARGET = 1 << 1
+} pcie_link_flags_t;
+
+typedef enum {
+ PCIE_LBW_S_ENABLED = 1 << 0,
+ PCIE_LBW_S_DISPATCHED = 1 << 1,
+ PCIE_LBW_S_RUNNING = 1 << 2
+} pcie_lbw_state_t;
+
/*
* For hot plugged device, these data are init'ed during during probe
* For non-hotplugged device, these data are init'ed in pci_autoconfig (on x86),
@@ -367,11 +374,25 @@ typedef struct pcie_bus {
/*
* Link speed specific fields.
*/
+ kmutex_t bus_speed_mutex;
+ pcie_link_flags_t bus_speed_flags;
pcie_link_width_t bus_max_width;
pcie_link_width_t bus_cur_width;
pcie_link_speed_t bus_sup_speed;
pcie_link_speed_t bus_max_speed;
pcie_link_speed_t bus_cur_speed;
+ pcie_link_speed_t bus_target_speed;
+
+ /*
+ * Link Bandwidth Monitoring
+ */
+ kmutex_t bus_lbw_mutex;
+ kcondvar_t bus_lbw_cv;
+ pcie_lbw_state_t bus_lbw_state;
+ taskq_ent_t bus_lbw_ent;
+ uint64_t bus_lbw_nevents;
+ char *bus_lbw_pbuf;
+ char *bus_lbw_cbuf;
} pcie_bus_t;
/*
@@ -652,6 +673,15 @@ extern pcie_bus_t *pciev_get_affected_dev(pf_impl_t *, pf_data_t *,
extern void pciev_eh_exit(pf_data_t *, uint_t);
extern boolean_t pcie_in_domain(pcie_bus_t *, uint_t);
+/* Link Bandwidth Monitoring */
+extern boolean_t pcie_link_bw_supported(dev_info_t *);
+extern int pcie_link_bw_enable(dev_info_t *);
+extern int pcie_link_bw_disable(dev_info_t *);
+
+/* Link Management */
+extern int pcie_link_set_target(dev_info_t *, pcie_link_speed_t);
+extern int pcie_link_retrain(dev_info_t *);
+
#define PCIE_ZALLOC(data) kmem_zalloc(sizeof (data), KM_SLEEP)
diff --git a/usr/src/uts/common/sys/sysevent/eventdefs.h b/usr/src/uts/common/sys/sysevent/eventdefs.h
index 8995ba4aa0..100b4c7ee9 100644
--- a/usr/src/uts/common/sys/sysevent/eventdefs.h
+++ b/usr/src/uts/common/sys/sysevent/eventdefs.h
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc.
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_SYSEVENT_EVENTDEFS_H
@@ -55,6 +55,7 @@ extern "C" {
#define EC_ZFS "EC_zfs" /* ZFS event */
#define EC_DATALINK "EC_datalink" /* datalink event */
#define EC_VRRP "EC_vrrp" /* VRRP event */
+#define EC_PCIE "EC_pcie" /* PCIe event */
/*
* The following event class is reserved for exclusive use
@@ -224,6 +225,12 @@ extern "C" {
*/
#define ESC_VRRP_STATE_CHANGE "ESC_vrrp_state_change"
+/*
+ * PCIe subclass definitions. Supporting attributes for PCIe state found in
+ * sys/sysevent/pcie.h.
+ */
+#define ESC_PCIE_LINK_STATE "ESC_pcie_link_state"
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/sysevent/pcie.h b/usr/src/uts/common/sys/sysevent/pcie.h
new file mode 100644
index 0000000000..ff1dc71c5d
--- /dev/null
+++ b/usr/src/uts/common/sys/sysevent/pcie.h
@@ -0,0 +1,57 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _SYS_SYSEVENT_PCIE_H
+#define _SYS_SYSEVENT_PCIE_H
+
+/*
+ * PCIe System Event payloads
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Event schema for ESC_PCIE_LINK_STATE
+ *
+ * Event Class - EC_PCIE
+ * Event Sub-Class - ESC_PCIE_LINK_STATE
+ *
+ * Attribute Name - PCIE_EV_DETECTOR_PATH
+ * Attribute Type - SE_DATA_TYPE_STRING
+ * Attribute Value - [devfs path of the node that detected the change]
+ *
+ * Attribute Name - PCIE_EV_CHILD_PATH
+ * Attribute Type - SE_DATA_TYPE_STRING
+ * Attribute Value - [devfs path of the updated child]
+ *
+ * Attribute Name - PCIE_EV_DETECTOR_FLAGS
+ * Attribute Type - SE_DATA_TYPE_UINT64
+ * Attribute Value - [PCIe flags that indicate the type of change]
+ */
+
+#define PCIE_EV_DETECTOR_PATH "detector_path"
+#define PCIE_EV_CHILD_PATH "child_path"
+#define PCIE_EV_DETECTOR_FLAGS "detector_flags"
+
+#define PCIE_EV_DETECTOR_FLAGS_LBMS 0x01
+#define PCIE_EV_DETECTOR_FLAGS_LABS 0x02
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SYSEVENT_PCIE_H */
diff --git a/usr/src/uts/i86pc/pcie/Makefile b/usr/src/uts/i86pc/pcie/Makefile
index b72a3772dc..1a849006f5 100644
--- a/usr/src/uts/i86pc/pcie/Makefile
+++ b/usr/src/uts/i86pc/pcie/Makefile
@@ -24,7 +24,7 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2019 Joyent, Inc.
# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
#
@@ -57,8 +57,6 @@ ALL_TARGET = $(BINARY)
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
CERRWARN += -_gcc=-Wno-unused-value
-CERRWARN += -_gcc=-Wno-uninitialized
-CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += -_gcc=-Wno-unused-variable
CERRWARN += -_gcc=-Wno-unused-function # safe
diff --git a/usr/src/uts/intel/io/pciex/pcieb_x86.c b/usr/src/uts/intel/io/pciex/pcieb_x86.c
index 0d73ca010e..3d3906cd1b 100644
--- a/usr/src/uts/intel/io/pciex/pcieb_x86.c
+++ b/usr/src/uts/intel/io/pciex/pcieb_x86.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
*/
/* x86 specific code used by the pcieb driver */
@@ -43,7 +44,8 @@
int pcieb_intel_workaround_disable = 0;
void
-pcieb_peekpoke_cb(dev_info_t *dip, ddi_fm_error_t *derr) {
+pcieb_peekpoke_cb(dev_info_t *dip, ddi_fm_error_t *derr)
+{
pf_eh_enter(PCIE_DIP2BUS(dip));
(void) pf_scan_fabric(dip, derr, NULL);
pf_eh_exit(PCIE_DIP2BUS(dip));
@@ -201,7 +203,8 @@ pcieb_plat_uninitchild(dev_info_t *child)
/* _OSC related */
void
-pcieb_init_osc(dev_info_t *devi) {
+pcieb_init_osc(dev_info_t *devi)
+{
pcie_bus_t *bus_p = PCIE_DIP2UPBUS(devi);
uint32_t osc_flags = OSC_CONTROL_PCIE_ADV_ERR;
@@ -322,10 +325,10 @@ static x86_error_reg_t intel_5000_rp_regs[] = {
{0x144, 8, 0x0, 0x0, 0xF0},
/* EMASK_UNCOR_PEX[21:0] UE mask */
- {0x148, 32, 0x0, PCIE_AER_UCE_UR, PCIE_AER_UCE_UR},
+ {0x148, 32, 0x0, PCIE_AER_UCE_UR, PCIE_AER_UCE_UR},
/* EMASK_RP_PEX[2:0] FE, UE, CE message detect mask */
- {0x150, 8, 0x0, 0x0, 0x1},
+ {0x150, 8, 0x0, 0x0, 0x1},
};
#define INTEL_5000_RP_REGS_LEN \
(sizeof (intel_5000_rp_regs) / sizeof (x86_error_reg_t))
@@ -353,16 +356,16 @@ static x86_error_reg_t intel_5400_rp_regs[] = {
{0x4E, 8, 0x0, 0x1, 0x0},
/* PEX_ERR_DOCMD[11:0] */
- {0x144, 16, 0x0, 0x0, 0xFF0},
+ {0x144, 16, 0x0, 0x0, 0xFF0},
/* PEX_ERR_PIN_MASK[4:0] do not mask ERR[2:0] pins used by DOCMD */
{0x146, 16, 0x0, 0x10, 0x10},
/* EMASK_UNCOR_PEX[21:0] UE mask */
- {0x148, 32, 0x0, PCIE_AER_UCE_UR, PCIE_AER_UCE_UR},
+ {0x148, 32, 0x0, PCIE_AER_UCE_UR, PCIE_AER_UCE_UR},
/* EMASK_RP_PEX[2:0] FE, UE, CE message detect mask */
- {0x150, 8, 0x0, 0x0, 0x1},
+ {0x150, 8, 0x0, 0x0, 0x1},
};
#define INTEL_5400_RP_REGS_LEN \
(sizeof (intel_5400_rp_regs) / sizeof (x86_error_reg_t))
@@ -569,7 +572,7 @@ pcieb_intel_rber_workaround(dev_info_t *dip)
if (!rber)
return;
- PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_SERV, pcieb_rber_sev);
+ (void) PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_SERV, pcieb_rber_sev);
}
/*
diff --git a/usr/src/uts/intel/pcieb/Makefile b/usr/src/uts/intel/pcieb/Makefile
index 3b9f363520..38f4b38a98 100644
--- a/usr/src/uts/intel/pcieb/Makefile
+++ b/usr/src/uts/intel/pcieb/Makefile
@@ -24,7 +24,7 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2019 Joyent, Inc.
#
# Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -64,21 +64,6 @@ MODSTUBS_DIR = $(OBJS_DIR)
CLEANFILES += $(MODSTUBS_O)
#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
-
-CERRWARN += -_gcc=-Wno-unused-value
-
-# needs work
-SMOFF += all_func_returns
-
-#
# Default build targets.
#
.KEEP_STATE:
@@ -103,4 +88,3 @@ install: $(INSTALL_DEPS)
# Include common targets.
#
include $(UTSBASE)/intel/Makefile.targ
-