diff options
author | Bryan Cantrill <bryan@joyent.com> | 2017-02-17 19:43:15 +0000 |
---|---|---|
committer | Bryan Cantrill <bryan@joyent.com> | 2017-02-22 20:31:29 +0000 |
commit | f8fc8f4b458c9b816775f6a3e1673719a05bf84c (patch) | |
tree | 7797e855180ea7967b92a1c07e95805c39d74a6a | |
parent | 167fee23fbf71f65bd79f8dee6ad84a04d618eb5 (diff) | |
download | illumos-joyent-f8fc8f4b458c9b816775f6a3e1673719a05bf84c.tar.gz |
OS-5890 disappearing ixgbe turns correctable error into panic
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
-rw-r--r-- | usr/src/uts/common/io/pciex/pcie_fault.c | 37 | ||||
-rw-r--r-- | usr/src/uts/common/io/pciex/pciev.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/sys/pcie_impl.h | 6 |
3 files changed, 30 insertions, 19 deletions
diff --git a/usr/src/uts/common/io/pciex/pcie_fault.c b/usr/src/uts/common/io/pciex/pcie_fault.c index f4a2e9190e..41c046fe66 100644 --- a/usr/src/uts/common/io/pciex/pcie_fault.c +++ b/usr/src/uts/common/io/pciex/pcie_fault.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ #include <sys/sysmacros.h> @@ -164,8 +165,8 @@ int pcie_disable_scan = 0; /* Disable fabric scan */ /* Inform interested parties that error handling is about to begin. */ /* ARGSUSED */ void -pf_eh_enter(pcie_bus_t *bus_p) { -} +pf_eh_enter(pcie_bus_t *bus_p) +{} /* Inform interested parties that error handling has ended. */ void @@ -304,7 +305,8 @@ done: } void -pcie_force_fullscan() { +pcie_force_fullscan() +{ pcie_full_scan = B_TRUE; } @@ -917,10 +919,11 @@ pf_default_hdl(dev_info_t *dip, pf_impl_t *impl) } /* - * Read vendor/device ID and check with cached data, if it doesn't match - * could very well be a device that isn't responding anymore. Just - * stop. Save the basic info in the error q for post mortem debugging - * purposes. + * Read vendor/device ID and check with cached data; if it doesn't + * match, it could very well mean that the device is no longer + * responding. In this case, we return PF_SCAN_BAD_RESPONSE; should + * the caller choose to panic in this case, we will have the basic + * info in the error queue for the purposes of postmortem debugging. */ if (PCIE_GET(32, bus_p, PCI_CONF_VENID) != bus_p->bus_dev_ven_id) { char buf[FM_MAX_CLASS]; @@ -931,12 +934,12 @@ pf_default_hdl(dev_info_t *dip, pf_impl_t *impl) DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, NULL); /* - * For IOV/Hotplug purposes skip gathering info fo this device, + * For IOV/Hotplug purposes skip gathering info for this device, * but populate affected info and severity. Clear out any data * that maybe been saved in the last fabric scan. */ pf_reset_pfd(pfd_p); - pfd_p->pe_severity_flags = PF_ERR_PANIC_BAD_RESPONSE; + pfd_p->pe_severity_flags = PF_ERR_BAD_RESPONSE; PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_SELF; /* Add the snapshot to the error q */ @@ -1377,7 +1380,7 @@ pf_analyse_error(ddi_fm_error_t *derr, pf_impl_t *impl) sts_flags = 0; /* skip analysing error when no error info is gathered */ - if (pfd_p->pe_severity_flags == PF_ERR_PANIC_BAD_RESPONSE) + if (pfd_p->pe_severity_flags == PF_ERR_BAD_RESPONSE) goto done; switch (PCIE_PFD2BUS(pfd_p)->bus_dev_type) { @@ -2165,7 +2168,8 @@ pf_matched_in_rc(pf_data_t *dq_head_p, pf_data_t *pfd_p, */ static void pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type, - pcie_req_id_t *bdf) { + pcie_req_id_t *bdf) +{ pf_data_t *rc_pfd_p; /* Could be DMA or PIO. Find out by look at error type. */ @@ -2216,7 +2220,8 @@ pf_pci_find_trans_type(pf_data_t *pfd_p, uint64_t *addr, uint32_t *trans_type, */ /* ARGSUSED */ int -pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd) { +pf_pci_decode(pf_data_t *pfd_p, uint16_t *cmd) +{ pcix_attr_t *attr; uint64_t addr; uint32_t trans_type; @@ -2415,7 +2420,8 @@ done: static int pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, - uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp) { + uint64_t addr, pcie_req_id_t bdf, ndi_fmc_t *fcp) +{ ndi_fmcentry_t *fep; int found = 0; int status; @@ -2490,7 +2496,7 @@ pf_hdl_compare(dev_info_t *dip, ddi_fm_error_t *derr, uint32_t flag, /* ARGSUSED */ static int pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p, - boolean_t is_primary) + boolean_t is_primary) { /* * Disabling this function temporarily until errors can be handled @@ -2537,7 +2543,8 @@ pf_log_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, pf_data_t *pfd_p, * accessed via the bus_p. */ int -pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p) { +pf_tlp_decode(pcie_bus_t *bus_p, pf_pcie_adv_err_regs_t *adv_reg_p) +{ pcie_tlp_hdr_t *tlp_hdr = (pcie_tlp_hdr_t *)adv_reg_p->pcie_ue_hdr; pcie_req_id_t my_bdf, tlp_bdf, flt_bdf = PCIE_INVALID_BDF; uint64_t flt_addr = 0; diff --git a/usr/src/uts/common/io/pciex/pciev.c b/usr/src/uts/common/io/pciex/pciev.c index b442aae5b5..c8b2cfdda7 100644 --- a/usr/src/uts/common/io/pciex/pciev.c +++ b/usr/src/uts/common/io/pciex/pciev.c @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2017, Joyent, Inc. + */ + #include <sys/types.h> #include <sys/ddi.h> #include <sys/dditypes.h> @@ -302,7 +306,7 @@ pciev_eh(pf_data_t *pfd_p, pf_impl_t *impl) pcie_faulty_all = B_TRUE; } else if (severity & (PF_ERR_NO_PANIC | PF_ERR_MATCHED_DEVICE | - PF_ERR_PANIC | PF_ERR_PANIC_BAD_RESPONSE)) { + PF_ERR_PANIC | PF_ERR_BAD_RESPONSE)) { uint16_t affected_flag, dev_affected_flags; uint_t is_panic = 0, is_aff_dev_found = 0; diff --git a/usr/src/uts/common/sys/pcie_impl.h b/usr/src/uts/common/sys/pcie_impl.h index 7be80c49f1..7f199a18ec 100644 --- a/usr/src/uts/common/sys/pcie_impl.h +++ b/usr/src/uts/common/sys/pcie_impl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ #ifndef _SYS_PCIE_IMPL_H @@ -417,11 +418,10 @@ typedef struct pf_impl { #define PF_ERR_MATCHED_PARENT (1 << 5) /* Error Handled By Parent */ #define PF_ERR_PANIC (1 << 6) /* Error should panic system */ #define PF_ERR_PANIC_DEADLOCK (1 << 7) /* deadlock detected */ -#define PF_ERR_PANIC_BAD_RESPONSE (1 << 8) /* Device no response */ +#define PF_ERR_BAD_RESPONSE (1 << 8) /* Device bad/no response */ #define PF_ERR_MATCH_DOM (1 << 9) /* Error Handled By IO domain */ -#define PF_ERR_FATAL_FLAGS \ - (PF_ERR_PANIC | PF_ERR_PANIC_DEADLOCK | PF_ERR_PANIC_BAD_RESPONSE) +#define PF_ERR_FATAL_FLAGS (PF_ERR_PANIC | PF_ERR_PANIC_DEADLOCK) #define PF_HDL_FOUND 1 #define PF_HDL_NOTFOUND 2 |