summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryan Cantrill <bryan@joyent.com>2017-02-17 19:43:15 +0000
committerBryan Cantrill <bryan@joyent.com>2017-02-22 20:31:29 +0000
commitf8fc8f4b458c9b816775f6a3e1673719a05bf84c (patch)
tree7797e855180ea7967b92a1c07e95805c39d74a6a
parent167fee23fbf71f65bd79f8dee6ad84a04d618eb5 (diff)
downloadillumos-joyent-f8fc8f4b458c9b816775f6a3e1673719a05bf84c.tar.gz
OS-5890 disappearing ixgbe turns correctable error into panic
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Approved by: Robert Mustacchi <rm@joyent.com>
-rw-r--r--usr/src/uts/common/io/pciex/pcie_fault.c37
-rw-r--r--usr/src/uts/common/io/pciex/pciev.c6
-rw-r--r--usr/src/uts/common/sys/pcie_impl.h6
3 files changed, 30 insertions, 19 deletions
diff --git a/usr/src/uts/common/io/pciex/pcie_fault.c b/usr/src/uts/common/io/pciex/pcie_fault.c
index f4a2e9190e..41c046fe66 100644
--- a/usr/src/uts/common/io/pciex/pcie_fault.c
+++ b/usr/src/uts/common/io/pciex/pcie_fault.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
#include <sys/sysmacros.h>
@@ -164,8 +165,8 @@ int pcie_disable_scan = 0; /* Disable fabric scan */
/* Inform interested parties that error handling is about to begin. */
/* ARGSUSED */
void
-pf_eh_enter(pcie_bus_t *bus_p) {
-}
+pf_eh_enter(pcie_bus_t *bus_p)
+{}
/* Inform interested parties that error handling has ended. */
void
@@ -304,7 +305,8 @@ done:
}
void
-pcie_force_fullscan() {
+pcie_force_fullscan()
+{
pcie_full_scan = B_TRUE;
}
@@ -917,10 +919,11 @@ pf_default_hdl(dev_info_t *dip, pf_impl_t *impl)
}
/*
- * Read vendor/device ID and check with cached data, if it doesn't match
- * could very well be a device that isn't responding anymore. Just
- * stop. Save the basic info in the error q for post mortem debugging
- * purposes.
+ * Read vendor/device ID and check with cached data; if it doesn't
+ * match, it could very well mean that the device is no longer
+ * responding. In this case, we return PF_SCAN_BAD_RESPONSE; should
+ * the caller choose to panic in this case, we will have the basic
+ * info in the error queue for the purposes of postmortem debugging.
*/
if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) {
char buf[FM_MAX_CLASS];
@@ -931,12 +934,12 @@ pf_default_hdl(dev_info_t *dip, pf_impl_t *impl)
DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL);
/*
- * For IOV/Hotplug purposes skip gathering info fo this device,
+ * For IOV/Hotplug purposes skip gathering info for this device,
* but populate affected info and severity. Clear out any data
* that maybe been saved in the last fabric scan.
*/
pf_reset_pfd(pfd_p);
- pfd_p->pe_severity_flags = PF_ERR_PANIC_BAD_RESPONSE;
+ pfd_p->pe_severity_flags = PF_ERR_BAD_RESPONSE;
PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF;
/* Add the snapshot to the error q */
@@ -1377,7 +1380,7 @@ pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl)
sts_flags = 0;
/* skip analysing error when no error info is gathered */
- if (pfd_p->pe_severity_flags == PF_ERR_PANIC_BAD_RESPONSE)
+ if (pfd_p->pe_severity_flags == PF_ERR_BAD_RESPONSE)
goto done;
switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) {
@@ -2165,7 +2168,8 @@ pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p,
*/
static void
pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type,
- pcie_req_id_t *bdf) {
+ pcie_req_id_t *bdf)
+{
pf_data_t *rc_pfd_p;
/* Could be DMA or PIO. Find out by look at error type. */
@@ -2216,7 +2220,8 @@ pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type,
*/
/* ARGSUSED */
int
-pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd) {
+pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd)
+{
pcix_attr_t *attr;
uint64_t addr;
uint32_t trans_type;
@@ -2415,7 +2420,8 @@ done:
static int
pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
- uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp) {
+ uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp)
+{
ndi_fmcentry_t *fep;
int found = 0;
int status;
@@ -2490,7 +2496,7 @@ pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag,
/* ARGSUSED */
static int
pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p,
- boolean_t is_primary)
+ boolean_t is_primary)
{
/*
* Disabling this function temporarily until errors can be handled
@@ -2537,7 +2543,8 @@ pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p,
* accessed via the bus_p.
*/
int
-pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p) {
+pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p)
+{
pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr;
pcie_req_id_t my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF;
uint64_t flt_addr = 0;
diff --git a/usr/src/uts/common/io/pciex/pciev.c b/usr/src/uts/common/io/pciex/pciev.c
index b442aae5b5..c8b2cfdda7 100644
--- a/usr/src/uts/common/io/pciex/pciev.c
+++ b/usr/src/uts/common/io/pciex/pciev.c
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2017, Joyent, Inc.
+ */
+
#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/dditypes.h>
@@ -302,7 +306,7 @@ pciev_eh(pf_data_t *pfd_p, pf_impl_t *impl)
pcie_faulty_all = B_TRUE;
} else if (severity & (PF_ERR_NO_PANIC | PF_ERR_MATCHED_DEVICE |
- PF_ERR_PANIC | PF_ERR_PANIC_BAD_RESPONSE)) {
+ PF_ERR_PANIC | PF_ERR_BAD_RESPONSE)) {
uint16_t affected_flag, dev_affected_flags;
uint_t is_panic = 0, is_aff_dev_found = 0;
diff --git a/usr/src/uts/common/sys/pcie_impl.h b/usr/src/uts/common/sys/pcie_impl.h
index 7be80c49f1..7f199a18ec 100644
--- a/usr/src/uts/common/sys/pcie_impl.h
+++ b/usr/src/uts/common/sys/pcie_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
#ifndef _SYS_PCIE_IMPL_H
@@ -417,11 +418,10 @@ typedef struct pf_impl {
#define PF_ERR_MATCHED_PARENT (1 << 5) /* Error Handled By Parent */
#define PF_ERR_PANIC (1 << 6) /* Error should panic system */
#define PF_ERR_PANIC_DEADLOCK (1 << 7) /* deadlock detected */
-#define PF_ERR_PANIC_BAD_RESPONSE (1 << 8) /* Device no response */
+#define PF_ERR_BAD_RESPONSE (1 << 8) /* Device bad/no response */
#define PF_ERR_MATCH_DOM (1 << 9) /* Error Handled By IO domain */
-#define PF_ERR_FATAL_FLAGS \
- (PF_ERR_PANIC | PF_ERR_PANIC_DEADLOCK | PF_ERR_PANIC_BAD_RESPONSE)
+#define PF_ERR_FATAL_FLAGS (PF_ERR_PANIC | PF_ERR_PANIC_DEADLOCK)
#define PF_HDL_FOUND 1
#define PF_HDL_NOTFOUND 2