diff options
author | et142600 <none@none> | 2006-12-18 17:33:27 -0800 |
---|---|---|
committer | et142600 <none@none> | 2006-12-18 17:33:27 -0800 |
commit | bf8fc2340620695a402331e5da7c7db43264174d (patch) | |
tree | 9b23a124fe91273377ea5adbd2b56d8eba5778dc | |
parent | 60ab199ec2cf89967053c7b7c15b36530083b272 (diff) | |
download | illumos-joyent-bf8fc2340620695a402331e5da7c7db43264174d.tar.gz |
PSARC/2006/599 SPARC PCI Express Fabric FMA Support
6345809 SPARC PCIe systems should not call pci_ereport_post
6404017 PX Error handling code should match error philosophy spec
6418713 Failed PIOs in PCIe sparc platforms may go undetected
6466248 Support PCIe Fabric FMA for SPARC Platforms
--HG--
rename : deleted_files/usr/src/uts/common/io/pcie_fault.c => usr/src/uts/common/io/pcie_fault.c
rename : deleted_files/usr/src/uts/sun4v/io/px/px_err_gen.c => usr/src/uts/sun4v/io/px/px_err_gen.c
31 files changed, 2499 insertions, 1550 deletions
diff --git a/usr/src/cmd/fm/eversholt/files/sparc/sun4/fire.esc b/usr/src/cmd/fm/eversholt/files/sparc/sun4/fire.esc index 68ff6c2da2..fd1d8bcdd9 100644 --- a/usr/src/cmd/fm/eversholt/files/sparc/sun4/fire.esc +++ b/usr/src/cmd/fm/eversholt/files/sparc/sun4/fire.esc @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,10 +18,10 @@ * * CDDL HEADER END */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -39,10 +38,18 @@ #define EBUS_FIT 1000 #define LINK_EVENTS_COUNT 10 #define LINK_EVENTS_TIME 1h - +#define CE_EVENTS_COUNT 10 +#define CE_EVENTS_TIME 1h + #define PROP_PLAT_FRU "PLAT-FRU" /* + * Macros for CE Fabric ereports + */ +#define PF_CE (1 << 1) +#define MATCH_CE (payloadprop("severity") == PF_CE) + +/* * Test for primary or secondary ereports */ #define IS_PRIMARY (payloadprop("primary")) @@ -195,7 +202,8 @@ event ereport.io.fire.pec.rto@hostbridge/pciexrc {within(5s)}; * Whenever a leaf device sends an error message (fatal, non-fatal, or CE) to * root complex, the nexus driver publishes this ereport to log the ereport. */ -event ereport.io.fire.fabric@hostbridge/pciexrc {within(5s)}; +event ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn {within(1s)}; +event ereport.io.fire.fabric@pcibus/pcidev/pcifn {within(1s)}; /* * A faulty Fire hostbridge may cause (* may cause PCI-E abort): @@ -722,7 +730,56 @@ prop upset.io.fire.nodiag@hostbridge (0)-> ereport.io.fire.pec.lin@hostbridge/pciexrc, ereport.io.fire.pec.lup@hostbridge/pciexrc, error.io.fire.pec.secondary@hostbridge/pciexrc, - error.io.fire.pec.fabric_error@hostbridge/pciexrc, - ereport.io.fire.fabric@hostbridge/pciexrc + error.io.fire.pec.fabric_error@hostbridge/pciexrc ; +event error.io.fire.fabric@hostbridge/pciexrc; + +prop fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn (1) -> + ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn { !MATCH_CE }; + +prop fault.io.fire.pci.device@pcibus/pcidev/pcifn (1) -> + ereport.io.fire.fabric@pcibus/pcidev/pcifn; + +/* + * Below rules are so we get a single suspect list in 1 fault with percentage + * of indiction being equal among all the suspect FRUs + */ +prop error.io.fire.fabric@hostbridge/pciexrc (0) -> + ereport.io.fire.fabric@pciexbus/pciexdev/pciexfn { + is_under(hostbridge/pciexrc, pciexbus/pciexdev/pciexfn) && !MATCH_CE + }; + +prop error.io.fire.fabric@hostbridge/pciexrc (0) -> + ereport.io.fire.fabric@pcibus/pcidev/pcifn { + is_under(hostbridge/pciexrc, pcibus/pcidev/pcifn) + }; + +prop fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn (1) -> + error.io.fire.fabric@hostbridge/pciexrc { + is_under(hostbridge/pciexrc, pciexbus/pciexdev/pciexfn) + }; + +prop fault.io.fire.pci.device@pcibus/pcidev/pcifn (1) -> + error.io.fire.fabric@hostbridge/pciexrc { + is_under(hostbridge/pciexrc, pcibus/pcidev/pcifn) + }; + +event upset.io.fire.fabric@pciexbus/pciexdev/pciexfn{within(1s)}; +event ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn{within(1s)}; + +/* SERD CEs */ +prop upset.io.fire.fabric@pciexbus[b]/pciexdev[d]/pciexfn[f] (0) -> + ereport.io.fire.fabric@pciexbus[b]/pciexdev[d]/pciexfn[f] + { MATCH_CE }; + +event upset.io.fire.fabric@pciexbus/pciexdev/pciexfn, + engine=serd.io.fire.fabric@pciexbus/pciexdev/pciexfn; + +engine serd.io.fire.fabric@pciexbus/pciexdev/pciexfn, + N=CE_EVENTS_COUNT, T=CE_EVENTS_TIME, method=persistent, + trip=ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn; + +prop fault.io.fire.pciex.device@pciexbus/pciexdev/pciexfn (0) -> + ereport.io.fire.pciex.ce@pciexbus/pciexdev/pciexfn; + diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 2a1a888dc1..05146cb881 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1301,7 +1301,7 @@ AS_INC_PATH += $(INC_PATH) -I$(UTSBASE)/common INCLUDE_PATH += $(INC_PATH) $(CCYFLAG)$(UTSBASE)/common # -PCIE_OBJS += pcie.o +PCIE_OBJS += pcie.o pcie_fault.o # Chelsio N110 10G NIC driver module # diff --git a/usr/src/uts/common/io/pcie.c b/usr/src/uts/common/io/pcie.c index 2d44c59d53..9b7374ba79 100644 --- a/usr/src/uts/common/io/pcie.c +++ b/usr/src/uts/common/io/pcie.c @@ -32,28 +32,28 @@ #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/sunndi.h> -#include <sys/promif.h> /* prom_printf */ -#include <sys/disp.h> /* prom_printf */ +#include <sys/fm/protocol.h> +#include <sys/fm/util.h> +#include <sys/promif.h> +#include <sys/disp.h> #include <sys/pcie.h> #include <sys/pci_cap.h> #include <sys/pcie_impl.h> -#include <sys/pci_impl.h> -static int pcie_get_bdf_from_dip(dev_info_t *dip, uint32_t *bdf); dev_info_t *pcie_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip); uint32_t pcie_get_bdf_for_dma_xfer(dev_info_t *dip, dev_info_t *rdip); -#ifdef DEBUG +#ifdef DEBUG uint_t pcie_debug_flags = 0; #define PCIE_DBG pcie_dbg static void pcie_dbg(char *fmt, ...); -#else /* DEBUG */ +#else /* DEBUG */ #define PCIE_DBG 0 && -#endif /* DEBUG */ +#endif /* DEBUG */ /* Variable to control default PCI-Express config settings */ ushort_t pcie_command_default = PCI_COMM_SERR_ENABLE | @@ -126,95 +126,159 @@ _info(struct modinfo *modinfop) int pcie_initchild(dev_info_t *cdip) { - ddi_acc_handle_t config_handle; uint8_t header_type; uint8_t bcr; uint16_t command_reg, status_reg; - uint16_t cap_ptr; - pci_parent_data_t *pd_p; + pcie_ppd_t *ppd_p; + ddi_acc_handle_t eh; - if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) + ppd_p = pcie_init_ppd(cdip); + if (ppd_p == NULL) return (DDI_FAILURE); - /* Allocate memory for pci parent data */ - pd_p = kmem_zalloc(sizeof (pci_parent_data_t), KM_SLEEP); + eh = ppd_p->ppd_cfg_hdl; - /* - * Retrieve and save BDF and PCIE2PCI bridge's secondary bus - * information in the parent private data structure. - */ - if (pcie_get_bdf_from_dip(cdip, &pd_p->pci_bdf) != DDI_SUCCESS) - goto fail; - - pd_p->pci_sec_bus = ddi_prop_get_int(DDI_DEV_T_ANY, cdip, 0, - "pcie2pci-sec-bus", 0); - - /* - * Determine the configuration header type. - */ - header_type = pci_config_get8(config_handle, PCI_CONF_HEADER); - PCIE_DBG("%s: header_type=%x\n", ddi_driver_name(cdip), header_type); - - /* - * Setup the device's command register - */ - status_reg = pci_config_get16(config_handle, PCI_CONF_STAT); - pci_config_put16(config_handle, PCI_CONF_STAT, status_reg); - command_reg = pci_config_get16(config_handle, PCI_CONF_COMM); + /* setup the device's command register */ + header_type = ppd_p->ppd_hdr_type; + status_reg = pci_config_get16(eh, PCI_CONF_STAT); + pci_config_put16(eh, PCI_CONF_STAT, status_reg); + command_reg = pci_config_get16(eh, PCI_CONF_COMM); command_reg |= pcie_command_default; - pci_config_put16(config_handle, PCI_CONF_COMM, command_reg); + pci_config_put16(eh, PCI_CONF_COMM, command_reg); - PCIE_DBG("%s: command=%x\n", ddi_driver_name(cdip), - pci_config_get16(config_handle, PCI_CONF_COMM)); + PCIE_DBG("pcie_initchild: %s(dip 0x%p), header_type=%x, " + "command=%x\n", ddi_driver_name(cdip), (void *)cdip, + header_type, pci_config_get16(eh, PCI_CONF_COMM)); /* * If the device has a bus control register then program it * based on the settings in the command register. */ - if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) { - status_reg = pci_config_get16(config_handle, + if (header_type == PCI_HEADER_ONE) { + status_reg = pci_config_get16(eh, PCI_BCNF_SEC_STATUS); - pci_config_put16(config_handle, PCI_BCNF_SEC_STATUS, + pci_config_put16(eh, PCI_BCNF_SEC_STATUS, status_reg); - bcr = pci_config_get8(config_handle, PCI_BCNF_BCNTRL); + bcr = pci_config_get8(eh, PCI_BCNF_BCNTRL); if (pcie_command_default & PCI_COMM_PARITY_DETECT) bcr |= PCI_BCNF_BCNTRL_PARITY_ENABLE; if (pcie_command_default & PCI_COMM_SERR_ENABLE) bcr |= PCI_BCNF_BCNTRL_SERR_ENABLE; bcr |= PCI_BCNF_BCNTRL_MAST_AB_MODE; - pci_config_put8(config_handle, PCI_BCNF_BCNTRL, bcr); + pci_config_put8(eh, PCI_BCNF_BCNTRL, bcr); + } + + if (ppd_p->ppd_pcie_off) + pcie_enable_errors(cdip, eh); + + return (DDI_SUCCESS); +} + +/* Initialize PCIe Parent Private Data */ +pcie_ppd_t * +pcie_init_ppd(dev_info_t *cdip) +{ + pcie_ppd_t *ppd_p = 0; + ddi_acc_handle_t eh; + int range_size; + + /* allocate memory for pcie parent data */ + ppd_p = kmem_zalloc(sizeof (pcie_ppd_t), KM_SLEEP); + + /* Create an config access special to error handling */ + if (pci_config_setup(cdip, &eh) != DDI_SUCCESS) { + kmem_free(ppd_p, sizeof (pcie_ppd_t)); + return (NULL); } + ppd_p->ppd_cfg_hdl = eh; + + /* get device's bus/dev/function number */ + if (pcie_get_bdf_from_dip(cdip, &ppd_p->ppd_bdf) != DDI_SUCCESS) + goto fail; + + /* Save the Vendor Id Device Id */ + ppd_p->ppd_dev_ven_id = pci_config_get32(eh, PCI_CONF_VENID); + + /* Save the Header Type */ + ppd_p->ppd_hdr_type = pci_config_get8(eh, PCI_CONF_HEADER); + ppd_p->ppd_hdr_type &= PCI_HEADER_TYPE_M; + + /* Save the Range information if device is a switch/bridge */ + if (ppd_p->ppd_hdr_type == PCI_HEADER_ONE) { + /* get "bus_range" property */ + range_size = sizeof (pci_bus_range_t); + if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, + "bus-range", (caddr_t)&ppd_p->ppd_bus_range, &range_size) + != DDI_PROP_SUCCESS) + goto fail; + + /* get secondary bus number */ + ppd_p->ppd_bdg_secbus = pci_config_get8(eh, PCI_BCNF_SECBUS); + + /* Get "ranges" property */ + if (ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, + "ranges", (caddr_t)&ppd_p->ppd_addr_ranges, + &ppd_p->ppd_addr_entries) != DDI_PROP_SUCCESS) + ppd_p->ppd_addr_entries = 0; + ppd_p->ppd_addr_entries /= sizeof (ppb_ranges_t); + } + + /* save "assigned-addresses" property array, ignore failues */ + if (ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, + "assigned-addresses", (caddr_t)&ppd_p->ppd_assigned_addr, + &ppd_p->ppd_assigned_entries) == DDI_PROP_SUCCESS) + ppd_p->ppd_assigned_entries /= sizeof (pci_regspec_t); + else + ppd_p->ppd_assigned_entries = 0; - if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) + if ((PCI_CAP_LOCATE(eh, PCI_CAP_ID_PCI_E, &ppd_p->ppd_pcie_off)) != DDI_FAILURE) { - pcie_enable_errors(cdip, config_handle); + ppd_p->ppd_dev_type = PCI_CAP_GET16(eh, NULL, + ppd_p->ppd_pcie_off, PCIE_PCIECAP) & + PCIE_PCIECAP_DEV_TYPE_MASK; - pd_p->pci_phfun = (pci_config_get8(config_handle, - cap_ptr + PCIE_DEVCAP) & PCIE_DEVCAP_PHTM_FUNC_MASK) >> 3; + ppd_p->ppd_pcie_phfun = (pci_config_get8(eh, + ppd_p->ppd_pcie_off + PCIE_DEVCAP) & + PCIE_DEVCAP_PHTM_FUNC_MASK) >> 3; + + if (PCI_CAP_LOCATE(eh, PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_AER), + &ppd_p->ppd_aer_off) != DDI_SUCCESS) + ppd_p->ppd_aer_off = NULL; + } else { + ppd_p->ppd_pcie_off = NULL; + ppd_p->ppd_pcie_phfun = NULL; + ppd_p->ppd_dev_type = PCIE_PCIECAP_DEV_TYPE_PCI_DEV; } - ddi_set_parent_data(cdip, (void *)pd_p); - pci_config_teardown(&config_handle); - return (DDI_SUCCESS); + ppd_p->ppd_dip = cdip; + ppd_p->ppd_fm_flags = 0; + ddi_set_parent_data(cdip, (void *)ppd_p); + + PCIE_DBG("Add %s(dip 0x%p, bdf 0x%x, secbus 0x%x)\n", + ddi_driver_name(cdip), (void *)cdip, ppd_p->ppd_bdf, + ppd_p->ppd_bdg_secbus); + + return (ppd_p); fail: - cmn_err(CE_WARN, "PCIE init child failed\n"); - kmem_free(pd_p, sizeof (pci_parent_data_t)); - pci_config_teardown(&config_handle); - return (DDI_FAILURE); + cmn_err(CE_WARN, "PCIE init err info failed BDF 0x%x\n", + ppd_p->ppd_bdf); + pci_config_teardown(&eh); + kmem_free(ppd_p, sizeof (pcie_ppd_t)); + return (NULL); } int pcie_postattach_child(dev_info_t *dip) { - ddi_acc_handle_t config_handle; + ddi_acc_handle_t cfg_hdl; int rval = DDI_FAILURE; - if (pci_config_setup(dip, &config_handle) != DDI_SUCCESS) + if (pci_config_setup(dip, &cfg_hdl) != DDI_SUCCESS) return (DDI_FAILURE); - rval = pcie_enable_ce(dip, config_handle); + rval = pcie_enable_ce(dip, cfg_hdl); - pci_config_teardown(&config_handle); + pci_config_teardown(&cfg_hdl); return (rval); } @@ -222,83 +286,87 @@ pcie_postattach_child(dev_info_t *dip) * PCI-Express child device de-initialization. * This function disables generic pci-express interrupts and error * handling. - * - * @param pdip parent dip (root nexus's dip) - * @param cdip child's dip (device's dip) - * @param arg pcie private data */ -/* ARGSUSED */ void pcie_uninitchild(dev_info_t *cdip) { - ddi_acc_handle_t config_handle; - pci_parent_data_t *pd_p; + pcie_ppd_t *ppd_p; + ppd_p = pcie_get_ppd(cdip); - if (pd_p = ddi_get_parent_data(cdip)) { - ddi_set_parent_data(cdip, NULL); - kmem_free(pd_p, sizeof (pci_parent_data_t)); - } - - if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) - return; - - pcie_disable_errors(cdip, config_handle); + pcie_disable_errors(cdip, ppd_p->ppd_cfg_hdl); + pcie_uninit_ppd(cdip); +} - pci_config_teardown(&config_handle); +void +pcie_uninit_ppd(dev_info_t *cdip) +{ + pcie_ppd_t *ppd_p; + + ppd_p = pcie_get_ppd(cdip); + ASSERT(ppd_p); + pci_config_teardown(&ppd_p->ppd_cfg_hdl); + kmem_free(ppd_p->ppd_assigned_addr, + (sizeof (pci_regspec_t) * ppd_p->ppd_assigned_entries)); + kmem_free(ppd_p->ppd_addr_ranges, + (sizeof (ppb_ranges_t) * ppd_p->ppd_addr_entries)); + + kmem_free(ppd_p, sizeof (pcie_ppd_t)); + ddi_set_parent_data(cdip, NULL); } /* ARGSUSED */ void -pcie_clear_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) +pcie_clear_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl) { uint16_t cap_ptr, aer_ptr, dev_type, device_sts; int rval = DDI_FAILURE; /* 1. clear the Legacy PCI Errors */ - device_sts = pci_config_get16(config_handle, PCI_CONF_STAT); - pci_config_put16(config_handle, PCI_CONF_STAT, device_sts); + device_sts = pci_config_get16(cfg_hdl, PCI_CONF_STAT); + pci_config_put16(cfg_hdl, PCI_CONF_STAT, device_sts); - if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) - == DDI_FAILURE) + if ((PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)) == + DDI_FAILURE) return; - rval = PCI_CAP_LOCATE(config_handle, PCI_CAP_XCFG_SPC + rval = PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_XCFG_SPC (PCIE_EXT_CAP_ID_AER), &aer_ptr); - dev_type = PCI_CAP_GET16(config_handle, NULL, cap_ptr, + dev_type = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_PCIECAP) & PCIE_PCIECAP_DEV_TYPE_MASK; + /* 1.1 clear the Legacy PCI Secondary Bus Errors */ + if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { + device_sts = pci_config_get16(cfg_hdl, + PCI_BCNF_SEC_STATUS); + pci_config_put16(cfg_hdl, PCI_BCNF_SEC_STATUS, + device_sts); + } + /* * Clear any pending errors */ /* 2. clear the Advanced PCIe Errors */ if (rval != DDI_FAILURE) { - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CE_STS, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CE_STS, -1); - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_UCE_STS, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_UCE_STS, -1); - if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, + if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCI2PCIE) { + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_SUCE_STS, -1); } } /* 3. clear the PCIe Errors */ - if ((device_sts = PCI_CAP_GET16(config_handle, NULL, cap_ptr, + if ((device_sts = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVSTS)) != PCI_CAP_EINVAL16) - PCI_CAP_PUT16(config_handle, PCI_CAP_ID_PCI_E, cap_ptr, + PCI_CAP_PUT16(cfg_hdl, PCI_CAP_ID_PCI_E, cap_ptr, PCIE_DEVSTS, device_sts); - - if (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { - device_sts = pci_config_get16(config_handle, - PCI_BCNF_SEC_STATUS); - pci_config_put16(config_handle, PCI_BCNF_SEC_STATUS, - device_sts); - } } void -pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) +pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl) { uint16_t cap_ptr, aer_ptr, dev_type, device_ctl; uint32_t aer_reg; @@ -307,29 +375,29 @@ pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) /* * Clear any pending errors */ - pcie_clear_errors(dip, config_handle); + pcie_clear_errors(dip, cfg_hdl); - if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) + if ((PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)) == DDI_FAILURE) return; - rval = PCI_CAP_LOCATE(config_handle, PCI_CAP_XCFG_SPC + rval = PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_XCFG_SPC (PCIE_EXT_CAP_ID_AER), &aer_ptr); - dev_type = PCI_CAP_GET16(config_handle, NULL, cap_ptr, - PCIE_PCIECAP) & PCIE_PCIECAP_DEV_TYPE_MASK; + dev_type = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_PCIECAP) & + PCIE_PCIECAP_DEV_TYPE_MASK; /* * Enable Baseline Error Handling but leave CE reporting off (poweron * default). */ - if ((device_ctl = PCI_CAP_GET16(config_handle, NULL, cap_ptr, + if ((device_ctl = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL)) != PCI_CAP_EINVAL16) { - PCI_CAP_PUT16(config_handle, NULL, cap_ptr, PCIE_DEVCTL, + PCI_CAP_PUT16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL, pcie_devctl_default | (pcie_base_err_default & (~PCIE_DEVCTL_CE_REPORTING_EN))); PCIE_DBG("%s%d: devctl 0x%x -> 0x%x\n", ddi_node_name(dip), ddi_get_instance(dip), device_ctl, - PCI_CAP_GET16(config_handle, NULL, cap_ptr, + PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL)); } @@ -341,22 +409,22 @@ pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) } /* Enable Uncorrectable errors */ - if ((aer_reg = PCI_XCAP_GET32(config_handle, NULL, aer_ptr, + if ((aer_reg = PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, PCIE_AER_UCE_MASK)) != PCI_CAP_EINVAL32) { - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_UCE_MASK, pcie_aer_uce_mask); PCIE_DBG("%s: AER UCE=0x%x->0x%x\n", ddi_driver_name(dip), - aer_reg, PCI_XCAP_GET32(config_handle, NULL, aer_ptr, + aer_reg, PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, PCIE_AER_UCE_MASK)); } /* Enable ECRC generation and checking */ - if ((aer_reg = PCI_XCAP_GET32(config_handle, NULL, aer_ptr, + if ((aer_reg = PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CTL)) != PCI_CAP_EINVAL32) { aer_reg |= (PCIE_AER_CTL_ECRC_GEN_ENA | PCIE_AER_CTL_ECRC_CHECK_ENA); - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CTL, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CTL, aer_reg); } @@ -369,12 +437,12 @@ pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) /* * Enable secondary bus errors */ - if ((aer_reg = PCI_XCAP_GET32(config_handle, NULL, aer_ptr, + if ((aer_reg = PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, PCIE_AER_SUCE_MASK)) != PCI_CAP_EINVAL32) { - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_SUCE_MASK, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_SUCE_MASK, pcie_aer_suce_mask); PCIE_DBG("%s: AER SUCE=0x%x->0x%x\n", ddi_driver_name(dip), - aer_reg, PCI_XCAP_GET32(config_handle, + aer_reg, PCI_XCAP_GET32(cfg_hdl, PCIE_EXT_CAP_ID_AER, aer_ptr, PCIE_AER_SUCE_MASK)); } } @@ -385,12 +453,12 @@ pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) * a call to pcie_enable_errors. */ int -pcie_enable_ce(dev_info_t *dip, ddi_acc_handle_t config_handle) +pcie_enable_ce(dev_info_t *dip, ddi_acc_handle_t cfg_hdl) { uint16_t cap_ptr, aer_ptr, device_sts, device_ctl; uint32_t tmp_pcie_aer_ce_mask; - if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) + if ((PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)) == DDI_FAILURE) return (DDI_FAILURE); @@ -410,60 +478,60 @@ pcie_enable_ce(dev_info_t *dip, ddi_acc_handle_t config_handle) return (DDI_SUCCESS); } - if (PCI_CAP_LOCATE(config_handle, PCI_CAP_XCFG_SPC + if (PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_XCFG_SPC (PCIE_EXT_CAP_ID_AER), &aer_ptr) != DDI_FAILURE) { /* Enable AER CE */ - PCI_XCAP_PUT32(config_handle, PCIE_EXT_CAP_ID_AER, + PCI_XCAP_PUT32(cfg_hdl, PCIE_EXT_CAP_ID_AER, aer_ptr, PCIE_AER_CE_MASK, tmp_pcie_aer_ce_mask); PCIE_DBG("%s: AER CE set to 0x%x\n", - ddi_driver_name(dip), PCI_XCAP_GET32(config_handle, NULL, + ddi_driver_name(dip), PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CE_MASK)); /* Clear any pending AER CE errors */ - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CE_STS, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CE_STS, -1); } /* clear any pending CE errors */ - if ((device_sts = PCI_CAP_GET16(config_handle, NULL, cap_ptr, + if ((device_sts = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVSTS)) != PCI_CAP_EINVAL16) - PCI_CAP_PUT16(config_handle, PCI_CAP_ID_PCI_E, cap_ptr, + PCI_CAP_PUT16(cfg_hdl, PCI_CAP_ID_PCI_E, cap_ptr, PCIE_DEVSTS, device_sts & (~PCIE_DEVSTS_CE_DETECTED)); /* Enable CE reporting */ - device_ctl = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVCTL); - PCI_CAP_PUT16(config_handle, NULL, cap_ptr, PCIE_DEVCTL, + device_ctl = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL); + PCI_CAP_PUT16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL, (device_ctl & (~PCIE_DEVCTL_ERR_MASK)) | pcie_base_err_default); PCIE_DBG("%s%d: devctl 0x%x -> 0x%x\n", ddi_node_name(dip), ddi_get_instance(dip), device_ctl, - PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVCTL)); + PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL)); return (DDI_SUCCESS); } /* ARGSUSED */ void -pcie_disable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) +pcie_disable_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl) { uint16_t cap_ptr, aer_ptr, dev_type, device_ctl; uint32_t aer_reg; int rval = DDI_FAILURE; - if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E, &cap_ptr)) + if ((PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)) == DDI_FAILURE) return; - rval = PCI_CAP_LOCATE(config_handle, PCI_CAP_XCFG_SPC + rval = PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_XCFG_SPC (PCIE_EXT_CAP_ID_AER), &aer_ptr); - dev_type = PCI_CAP_GET16(config_handle, NULL, cap_ptr, + dev_type = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_PCIECAP) & PCIE_PCIECAP_DEV_TYPE_MASK; /* * Disable PCI-Express Baseline Error Handling */ - device_ctl = PCI_CAP_GET16(config_handle, NULL, cap_ptr, PCIE_DEVCTL); + device_ctl = PCI_CAP_GET16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL); device_ctl &= ~PCIE_DEVCTL_ERR_MASK; - PCI_CAP_PUT16(config_handle, NULL, cap_ptr, PCIE_DEVCTL, device_ctl); + PCI_CAP_PUT16(cfg_hdl, NULL, cap_ptr, PCIE_DEVCTL, device_ctl); /* * Disable PCI-Express Advanced Error Handling if Exists @@ -473,20 +541,20 @@ pcie_disable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) } /* Disable Uncorrectable errors */ - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_UCE_MASK, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_UCE_MASK, PCIE_AER_UCE_BITS); /* Disable Correctable errors */ - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CE_MASK, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CE_MASK, PCIE_AER_CE_BITS); /* Disable ECRC generation and checking */ - if ((aer_reg = PCI_XCAP_GET32(config_handle, NULL, aer_ptr, + if ((aer_reg = PCI_XCAP_GET32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CTL)) != PCI_CAP_EINVAL32) { aer_reg &= ~(PCIE_AER_CTL_ECRC_GEN_ENA | PCIE_AER_CTL_ECRC_CHECK_ENA); - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_CTL, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_CTL, aer_reg); } /* @@ -498,12 +566,15 @@ pcie_disable_errors(dev_info_t *dip, ddi_acc_handle_t config_handle) /* * Disable secondary bus errors */ - PCI_XCAP_PUT32(config_handle, NULL, aer_ptr, PCIE_AER_SUCE_MASK, + PCI_XCAP_PUT32(cfg_hdl, NULL, aer_ptr, PCIE_AER_SUCE_MASK, PCIE_AER_SUCE_BITS); } -static int -pcie_get_bdf_from_dip(dev_info_t *dip, uint32_t *bdf) +/* + * Extract bdf from "reg" property. + */ +int +pcie_get_bdf_from_dip(dev_info_t *dip, pcie_req_id_t *bdf) { pci_regspec_t *regspec; int reglen; @@ -560,14 +631,33 @@ pcie_get_bdf_for_dma_xfer(dev_info_t *dip, dev_info_t *rdip) PCI_GET_SEC_BUS(cdip) : PCI_GET_BDF(cdip)); } +/* + * Returns Parent Private Data for PCIe devices and PCI devices that are in PCIe + * systems + */ +pcie_ppd_t * +pcie_get_ppd(dev_info_t *dip) +{ + return ((pcie_ppd_t *)ddi_get_parent_data(dip)); +} + +/* + * Is the rdip a child of dip. Used for checking certain CTLOPS from bubbling + * up erronously. Ex. ISA ctlops to a PCI-PCI Bridge. + */ +boolean_t +pcie_is_child(dev_info_t *dip, dev_info_t *rdip) +{ + dev_info_t *cdip = ddi_get_child(dip); + for (; cdip; cdip = ddi_get_next_sibling(cdip)) + if (cdip == rdip) + break; + return (cdip != NULL); +} + #ifdef DEBUG /* - * This is a temporary stop gap measure. - * PX runs at PIL 14, which is higher than the clock's PIL. - * As a results we cannot safely print while servicing interrupts using - * cmn_err or prom_printf. - * - * For debugging purposes set px_dbg_print != 0 to see printf messages + * For debugging purposes set pcie_dbg_print != 0 to see printf messages * during interrupt. * * When a proper solution is in place this code will disappear. diff --git a/deleted_files/usr/src/uts/common/io/pcie_fault.c b/usr/src/uts/common/io/pcie_fault.c index 4d1f94f9c0..4d1f94f9c0 100644 --- a/deleted_files/usr/src/uts/common/io/pcie_fault.c +++ b/usr/src/uts/common/io/pcie_fault.c diff --git a/usr/src/uts/common/sys/pci.h b/usr/src/uts/common/sys/pci.h index 8eecb28c77..0aa406df04 100644 --- a/usr/src/uts/common/sys/pci.h +++ b/usr/src/uts/common/sys/pci.h @@ -695,8 +695,19 @@ extern "C" { /* * PCI-X bridge capability related definitions */ -#define PCI_PCIX_SEC_STATUS 0x2 /* Secondary status register offset */ -#define PCI_PCIX_BDG_STATUS 0x4 /* Bridge Status register offset */ +#define PCI_PCIX_SEC_STATUS 0x2 /* Secondary Status offset */ +#define PCI_PCIX_SEC_STATUS_SCD 0x4 /* Split Completion Discarded */ +#define PCI_PCIX_SEC_STATUS_USC 0x8 /* Unexpected Split Complete */ +#define PCI_PCIX_SEC_STATUS_SCO 0x10 /* Split Completion Overrun */ +#define PCI_PCIX_SEC_STATUS_SRD 0x20 /* Split Completion Delayed */ +#define PCI_PCIX_SEC_STATUS_ERR_MASK 0x3C + +#define PCI_PCIX_BDG_STATUS 0x4 /* Bridge Status offset */ +#define PCI_PCIX_BDG_STATUS_USC 0x80000 +#define PCI_PCIX_BDG_STATUS_SCO 0x100000 +#define PCI_PCIX_BDG_STATUS_SRD 0x200000 +#define PCI_PCIX_BDG_STATUS_ERR_MASK 0x380000 + #define PCI_PCIX_UP_SPL_CTL 0x8 /* Upstream split ctrl reg offset */ #define PCI_PCIX_DOWN_SPL_CTL 0xC /* Downstream split ctrl reg offset */ #define PCI_PCIX_BDG_ECC_STATUS 0x10 /* ECC Status register offset */ diff --git a/usr/src/uts/common/sys/pci_impl.h b/usr/src/uts/common/sys/pci_impl.h index 3e492e52ed..5dbf97ba30 100644 --- a/usr/src/uts/common/sys/pci_impl.h +++ b/usr/src/uts/common/sys/pci_impl.h @@ -123,22 +123,6 @@ extern int memlist_count(struct memlist *); #endif /* __i386 || __amd64 */ /* - * Parent private data structure for PCI/PCI-X/PCIe devices. - */ -typedef struct pci_parent_data { - uint32_t pci_bdf; /* BDF for pci/pci-x/pcie */ - uint8_t pci_sec_bus; /* PCIE2PCI bridge's secondary bus */ - uint8_t pci_phfun; /* Phantom funs for pci-x/pcie */ -} pci_parent_data_t; - -#define PCI_GET_BDF(dip) \ - ((pci_parent_data_t *)DEVI((dip))->devi_parent_data)->pci_bdf -#define PCI_GET_SEC_BUS(dip) \ - ((pci_parent_data_t *)DEVI((dip))->devi_parent_data)->pci_sec_bus -#define PCI_GET_PHFUN(dip) \ - ((pci_parent_data_t *)DEVI((dip))->devi_parent_data)->pci_phfun - -/* * PCI capability related definitions. */ diff --git a/usr/src/uts/common/sys/pcie.h b/usr/src/uts/common/sys/pcie.h index 98acb6ccd1..624d3f9468 100644 --- a/usr/src/uts/common/sys/pcie.h +++ b/usr/src/uts/common/sys/pcie.h @@ -592,6 +592,11 @@ typedef uint16_t pcie_req_id_t; #define PCIE_REQ_ID_FUNC_SHIFT 0 #define PCIE_REQ_ID_FUNC_MASK 0x0007 +#define PCIE_CPL_STS_SUCCESS 0 +#define PCIE_CPL_STS_UR 1 +#define PCIE_CPL_STS_CRS 2 +#define PCIE_CPL_STS_CA 4 + #if defined(_BIT_FIELDS_LTOH) /* * PCI Express little-endian common TLP header format diff --git a/usr/src/uts/common/sys/pcie_impl.h b/usr/src/uts/common/sys/pcie_impl.h index 30910e2318..56e1b5be6c 100644 --- a/usr/src/uts/common/sys/pcie_impl.h +++ b/usr/src/uts/common/sys/pcie_impl.h @@ -32,28 +32,171 @@ extern "C" { #endif +#include <sys/pcie.h> + +/* PCI-E config space data for error handling and ereport */ +typedef struct pf_data { + dev_info_t *dip; + pcie_req_id_t bdf; + uint32_t severity_flags; + int parent_index; + pcie_req_id_t fault_bdf; + uint32_t fault_addr; + int send_erpt; + + /* 0-3Fh. PCI */ + uint16_t vendor_id; + uint16_t device_id; + uint8_t hdr_type; + uint16_t command; /* command */ + uint16_t status; /* status */ + uint8_t rev_id; + uint16_t s_status; /* Bridge secondary status */ + pcie_req_id_t bdg_secbus; /* Bridge secondary bus num */ + + /* 40h-FFh. PCI-X Capability */ + uint16_t pcix_s_status; /* PCI-X Secondary status */ + uint32_t pcix_bdg_status; /* PCI-X Bridge status */ + + /* 40h-FFh. PCI-E Capability */ + uint16_t pcie_off; /* PCI-E capability offset */ + uint8_t dev_type; /* device/port type */ + uint16_t dev_status; /* device status */ + + /* 100h-FFFh. Extended PCI-E */ + uint16_t aer_off; /* AER offset */ + + uint32_t aer_ce_status; /* AER Correctable Errors */ + + uint32_t aer_ue_status; /* AER Uncorrectable Errors */ + uint32_t aer_severity; + uint32_t aer_control; + uint32_t aer_h0; + uint32_t aer_h1; + uint32_t aer_h2; + uint32_t aer_h3; + + uint32_t s_aer_ue_status; /* Secondary AER UEs */ + uint32_t s_aer_control; + uint32_t s_aer_severity; + uint32_t s_aer_h0; + uint32_t s_aer_h1; + uint32_t s_aer_h2; + uint32_t s_aer_h3; +} pf_data_t; + +/* Information used while handling errors in the fabric. */ +typedef struct pf_impl { + dev_info_t *pf_rpdip; + pcie_req_id_t pf_fbdf; /* captured fault bdf to scan */ + uint32_t pf_faddr; /* captured fault addr to scan */ + ddi_fm_error_t *pf_derr; + pf_data_t *pf_dq_p; /* ptr to pcie fault data queue */ + int *pf_dq_tail_p; /* last valid index of fault data q */ +} pf_impl_t; + +/* Parent Private data of PCI/PCIe devices in a PCIe system */ +typedef struct pcie_ppd { + dev_info_t *ppd_dip; + ddi_acc_handle_t ppd_cfg_hdl; /* error handling acc handle */ + kmutex_t ppd_fm_lock; /* error handling lock */ + uint_t ppd_fm_flags; + + /* Static PCI/PCIe information */ + pcie_req_id_t ppd_bdf; + uint32_t ppd_dev_ven_id; /* device/vendor ID */ + uint8_t ppd_hdr_type; /* pci header type, see pci.h */ + uint8_t ppd_dev_type; /* PCI-E dev type, see pcie.h */ + uint8_t ppd_bdg_secbus; /* Bridge secondary bus num */ + uint16_t ppd_pcie_off; /* PCIe Capability Offset */ + uint16_t ppd_aer_off; /* PCIe Advanced Error Offset */ + uint16_t ppd_pcix_off; /* PCIx Capability Offset */ + uint8_t ppd_pcie_phfun; /* Phantom funs for pcix/pcie */ + pci_bus_range_t ppd_bus_range; /* pci bus-range property */ + ppb_ranges_t *ppd_addr_ranges; /* pci range property */ + int ppd_addr_entries; /* number of range prop */ + pci_regspec_t *ppd_assigned_addr; /* "assigned-address" prop */ + int ppd_assigned_entries; /* number of prop entries */ +} pcie_ppd_t; + +#define PCI_GET_BDF(dip) \ + ((pcie_ppd_t *)pcie_get_ppd(dip))->ppd_bdf +#define PCI_GET_SEC_BUS(dip) \ + ((pcie_ppd_t *)pcie_get_ppd(dip))->ppd_bdg_secbus +#define PCI_GET_PHFUN(dip) \ + ((pcie_ppd_t *)pcie_get_ppd(dip))->ppd_pcie_phfun + /* * The following flag is used for Broadcom 5714/5715 bridge prefetch issue. * This flag will be used both by px and px_pci nexus drivers. */ #define PX_DMAI_FLAGS_MAP_BUFZONE 0x40000 -/* - * PCI-Express Friendly Functions - */ +/* ppd_fm_flags field */ +#define PF_FM_READY (1 << 0) /* ppd_fm_lock initialized */ +#define PF_IS_NH (1 << 1) /* known as non-hardened */ + +/* PCIe fabric error handling return codes */ +#define PF_NO_ERROR (1 << 0) /* No error seen */ +#define PF_CE (1 << 1) /* Correctable Error */ +#define PF_NO_PANIC (1 << 2) /* Error should not panic sys */ +#define PF_MATCHED_DEVICE (1 << 3) /* Error Handled By Device */ +#define PF_MATCHED_RC (1 << 4) /* Error Handled By RC */ +#define PF_MATCHED_PARENT (1 << 5) /* Error Handled By Parent */ +#define PF_PANIC (1 << 6) /* Error should panic system */ + +/* PCIe fabric handle lookup return codes */ +#define PF_HDL_FOUND 0 +#define PF_HDL_NOTFOUND 1 + +/* PCIe fabric handle lookup address flags */ +#define PF_DMA_ADDR (1 << 0) +#define PF_PIO_ADDR (1 << 1) +#define PF_CFG_ADDR (1 << 2) + +#define PF_SEND_ERPT_YES 1 +#define PF_SEND_ERPT_UNKNOWN 0 +#define PF_SEND_ERPT_NO -1 + +#define PF_SUCCESS (1 << 0) +#define PF_FAILURE (1 << 1) +#define PF_DO_NOT_SCAN (1 << 2) + +/* PCIe helper functions */ +extern pcie_ppd_t *pcie_get_ppd(dev_info_t *dip); + +/* PCIe Friendly Functions */ extern int pcie_initchild(dev_info_t *dip); extern void pcie_uninitchild(dev_info_t *dip); -extern void pcie_clear_errors(dev_info_t *dip, - ddi_acc_handle_t config_handle); +extern void pcie_clear_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl); extern int pcie_postattach_child(dev_info_t *dip); -extern void pcie_enable_errors(dev_info_t *dip, - ddi_acc_handle_t config_handle); -extern void pcie_disable_errors(dev_info_t *dip, - ddi_acc_handle_t config_handle); +extern void pcie_enable_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl); +extern void pcie_disable_errors(dev_info_t *dip, ddi_acc_handle_t cfg_hdl); +extern int pcie_enable_ce(dev_info_t *dip, ddi_acc_handle_t cfg_hdl); extern dev_info_t *pcie_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip); extern uint32_t pcie_get_bdf_for_dma_xfer(dev_info_t *dip, dev_info_t *rdip); -extern int pcie_enable_ce(dev_info_t *dip, - ddi_acc_handle_t config_handle); + +extern pcie_ppd_t *pcie_init_ppd(dev_info_t *cdip); +extern void pcie_uninit_ppd(dev_info_t *cdip); +extern boolean_t pcie_is_child(dev_info_t *dip, dev_info_t *rdip); +extern int pcie_get_bdf_from_dip(dev_info_t *dip, pcie_req_id_t *bdf); + +/* PCIe error handling functions */ +extern int pf_en_dq(pf_data_t *pf_data_p, pf_data_t *dq_p, int *dq_tail_p, + pcie_req_id_t pbdf); +extern int pf_get_dq_size(void); +extern int pf_tlp_decode(dev_info_t *rpdip, pf_data_t *pf_data_p, + pcie_req_id_t *bdf, uint32_t *addr, uint32_t *trans_type); +extern int pf_tlp_hdl_lookup(dev_info_t *rpdip, ddi_fm_error_t *derr, + pf_data_t *pf_data_p); +extern int pf_hdl_lookup(dev_info_t *rpdip, uint64_t ena, + uint32_t flag, uint32_t addr, pcie_req_id_t bdf); +extern int pf_scan_fabric(dev_info_t *rpdip, ddi_fm_error_t *derr, + pf_data_t *dq_p, int *dq_tail_p); +extern void pf_init(dev_info_t *dip, ddi_iblock_cookie_t ibc); +extern void pf_fini(dev_info_t *dip); +extern boolean_t pf_ready(dev_info_t *dip); + #ifdef __cplusplus } diff --git a/usr/src/uts/sparc/Makefile.files b/usr/src/uts/sparc/Makefile.files index 27f15fe13d..96d0b63d68 100644 --- a/usr/src/uts/sparc/Makefile.files +++ b/usr/src/uts/sparc/Makefile.files @@ -66,7 +66,7 @@ FD_OBJS += fd_asm.o CPR_SPARC_OBJS += cpr_sparc.o PCI_PCI_OBJS += pci_pci.o pci_debug.o pci_pwr.o pcix.o -PX_PCI_OBJS += px_pci.o px_debug.o pcie_pwr.o +PX_PCI_OBJS += px_pci.o pcie_pwr.o FCODE_OBJS += fcode.o # diff --git a/usr/src/uts/sparc/pci_pci/Makefile b/usr/src/uts/sparc/pci_pci/Makefile index 52f686384f..71cd4a422d 100644 --- a/usr/src/uts/sparc/pci_pci/Makefile +++ b/usr/src/uts/sparc/pci_pci/Makefile @@ -18,7 +18,6 @@ # # CDDL HEADER END # -# # uts/sparc/pci_pci/Makefile # Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. @@ -82,6 +81,11 @@ CFLAGS += -dalign LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON # +# Dependency +# +LDFLAGS += -dy -Nmisc/pcie + +# # Default build targets. # .KEEP_STATE: diff --git a/usr/src/uts/sun4/io/px/pcie_pwr.c b/usr/src/uts/sun4/io/px/pcie_pwr.c index e5b394acf1..ccba95faac 100644 --- a/usr/src/uts/sun4/io/px/pcie_pwr.c +++ b/usr/src/uts/sun4/io/px/pcie_pwr.c @@ -36,11 +36,22 @@ #include <sys/sunndi.h> #include <sys/ddi_impldefs.h> #include <sys/ddi_implfuncs.h> -#include <sys/pci.h> +#include <sys/pcie.h> #include <sys/pcie_impl.h> +#include <sys/promif.h> /* prom_printf */ #include "pcie_pwr.h" -#include "px_pci.h" -#include "px_debug.h" + +#if defined(DEBUG) + +#define DBG pcie_pwr_dbg +static void pcie_pwr_dbg(dev_info_t *dip, char *fmt, ...); +static uint_t pcie_pwr_print = 0; + +#else /* DEBUG */ + +#define DBG 0 && + +#endif /* DEBUG */ /* * This file implements the power management functionality for @@ -122,10 +133,10 @@ pcie_power(dev_info_t *dip, int component, int level) (level == PM_LEVEL_D2 && (pmcaps & PCIE_SUPPORTS_D2))); mutex_enter(&pwr_p->pwr_lock); - DBG(DBG_PWR, dip, "pcie_power: change from %d to %d\n", + DBG(dip, "pcie_power: change from %d to %d\n", pwr_p->pwr_func_lvl, level); if (pwr_p->pwr_func_lvl == level) { - DBG(DBG_PWR, dip, "pcie_power: already at %d\n", level); + DBG(dip, "pcie_power: already at %d\n", level); ret = DDI_SUCCESS; goto pcie_pwr_done; } @@ -136,7 +147,7 @@ pcie_power(dev_info_t *dip, int component, int level) * or there is a hold. */ if (pwr_p->pwr_flags & PCIE_PM_BUSY) { - DBG(DBG_PWR, dip, "pcie_power: rejecting change to %d " + DBG(dip, "pcie_power: rejecting change to %d " "as busy\n", level); goto pcie_pwr_done; } @@ -150,7 +161,7 @@ pcie_power(dev_info_t *dip, int component, int level) ASSERT(!counters[PCIE_D0_INDEX] && !counters[PCIE_UNKNOWN_INDEX]); if (level < pwr_level_allowed(pwr_p)) { - DBG(DBG_PWR, dip, "pcie_power: rejecting level %d as" + DBG(dip, "pcie_power: rejecting level %d as" " %d is the lowest possible\n", level, pwr_level_allowed(pwr_p)); goto pcie_pwr_done; @@ -158,12 +169,12 @@ pcie_power(dev_info_t *dip, int component, int level) } if (pcie_pwr_change(dip, pwr_p, level) != DDI_SUCCESS) { - DBG(DBG_PWR, dip, "pcie_power: attempt to change to %d " + DBG(dip, "pcie_power: attempt to change to %d " " failed \n", level); goto pcie_pwr_done; } pwr_p->pwr_func_lvl = level; - DBG(DBG_PWR, dip, "pcie_power: level changed to %d \n", level); + DBG(dip, "pcie_power: level changed to %d \n", level); ret = DDI_SUCCESS; pcie_pwr_done: @@ -210,9 +221,9 @@ pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new) } /* Save config space, if going to D3 */ if (new == PM_LEVEL_D3) { - DBG(DBG_PWR, dip, "pwr_change: saving config space regs\n"); + DBG(dip, "pwr_change: saving config space regs\n"); if (pci_save_config_regs(dip) != DDI_SUCCESS) { - DBG(DBG_PWR, dip, "pcie_pwr_change: failed to save " + DBG(dip, "pcie_pwr_change: failed to save " "config space regs\n"); return (DDI_FAILURE); } @@ -233,9 +244,9 @@ pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new) * Restore config space if coming out of D3 */ if (pwr_p->pwr_func_lvl == PM_LEVEL_D3) { - DBG(DBG_PWR, dip, "pcie_pwr_change: restoring config space\n"); + DBG(dip, "pcie_pwr_change: restoring config space\n"); if (pci_restore_config_regs(dip) != DDI_SUCCESS) { - DBG(DBG_PWR, dip, "pcie_pwr_change: failed to restore " + DBG(dip, "pcie_pwr_change: failed to restore " "config space regs\n"); return (DDI_FAILURE); } @@ -292,7 +303,7 @@ pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, mutex_enter(&pwr_p->pwr_lock); switch (op) { case BUS_POWER_PRE_NOTIFICATION: - DBG(DBG_PWR, dip, "bus_power: %s@%d op %s %d->%d\n", + DBG(dip, "pcie_bus_power: %s@%d op %s %d->%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip), pcie_decode_pwr_op(op), old_level, new_level); /* @@ -305,12 +316,12 @@ pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, */ if (pwr_p->pwr_flags & PCIE_NO_CHILD_PM) { if (!PCIE_IS_COMPS_COUNTED(cdip)) { - DBG(DBG_PWR, dip, "bus_power: marking child " + DBG(dip, "pcie_bus_power: marking child " "busy to disable pm \n"); (void) pm_busy_component(cdip, 0); } if (new_level < PM_LEVEL_D0 && !comp) { - DBG(DBG_PWR, dip, "bus_power: rejecting " + DBG(dip, "pcie_bus_power: rejecting " "child's attempt to go to %d\n", new_level); rv = DDI_FAILURE; } @@ -322,7 +333,7 @@ pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, case BUS_POWER_HAS_CHANGED: case BUS_POWER_POST_NOTIFICATION: - DBG(DBG_PWR, dip, "bus_power: %s@%d op %s %d->%d\n", + DBG(dip, "pcie_bus_power: %s@%d op %s %d->%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip), pcie_decode_pwr_op(op), old_level, new_level); /* @@ -337,7 +348,7 @@ pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, (void) pcie_pm_add_child(dip, cdip); if ((pwr_p->pwr_flags & PCIE_NO_CHILD_PM) && (op == BUS_POWER_HAS_CHANGED)) { - DBG(DBG_PWR, dip, "bus_power: marking child " + DBG(dip, "pcie_bus_power: marking child " "busy to disable pm \n"); (void) pm_busy_component(cdip, 0); /* @@ -366,7 +377,7 @@ pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, } if (*((int *)result) == DDI_FAILURE) { - DBG(DBG_PWR, dip, "bus_power: change for %s%d failed\n", + DBG(dip, "pcie_bus_power: change for %s%d failed\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); break; } @@ -389,7 +400,7 @@ pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, */ if (level_allowed >= pwr_p->pwr_func_lvl && !(pwr_p->pwr_flags & PCIE_PM_BUSY)) { - DBG(DBG_PWR, dip, "bus_power: marking busy\n"); + DBG(dip, "pcie_bus_power: marking busy\n"); (void) pm_busy_component(dip, 0); pwr_p->pwr_flags |= PCIE_PM_BUSY; break; @@ -406,7 +417,7 @@ pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, * For pci express, we should check here whether * the link is in L1 state or not. */ - DBG(DBG_PWR, dip, "bus_power: marking idle\n"); + DBG(dip, "pcie_bus_power: marking idle\n"); (void) pm_idle_component(dip, 0); pwr_p->pwr_flags &= ~PCIE_PM_BUSY; break; @@ -500,7 +511,7 @@ pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p) if (!comps) return; - DBG(DBG_PWR, dip, "pcie_add_comps: unknown level counter incremented " + DBG(dip, "pcie_add_comps: unknown level counter incremented " "from %d by %d because of %s@%d\n", (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX], comps, ddi_driver_name(cdip), ddi_get_instance(cdip)); @@ -547,7 +558,7 @@ pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p) } return; } - DBG(DBG_PWR, dip, "pcie_remove_comps:counters decremented because of " + DBG(dip, "pcie_remove_comps:counters decremented because of " "%s@%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); child_counters = PCIE_CHILD_COUNTERS(cdip); /* @@ -589,7 +600,7 @@ pwr_common_setup(dev_info_t *dip) if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, "pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) { - DBG(DBG_PWR, dip, "can't create pm-want-child-notification \n"); + DBG(dip, "can't create pm-want-child-notification \n"); goto pwr_common_err; } pcie_pm_p->pcie_pwr_p = pwr_p; @@ -657,11 +668,11 @@ pcie_pm_hold(dev_info_t *dip) */ mutex_enter(&pwr_p->pwr_lock); ASSERT(pwr_p->pwr_hold >= 0); - DBG(DBG_PWR, dip, "pm_hold: incrementing hold \n"); + DBG(dip, "pm_hold: incrementing hold \n"); pwr_p->pwr_hold++; /* Mark itself busy, if it is not done already */ if (!(pwr_p->pwr_flags & PCIE_PM_BUSY)) { - DBG(DBG_PWR, dip, "pm_hold: marking busy\n"); + DBG(dip, "pm_hold: marking busy\n"); pwr_p->pwr_flags |= PCIE_PM_BUSY; (void) pm_busy_component(dip, 0); } @@ -671,7 +682,7 @@ pcie_pm_hold(dev_info_t *dip) } mutex_exit(&pwr_p->pwr_lock); if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) { - DBG(DBG_PWR, dip, "pm_hold: attempt to raise power " + DBG(dip, "pm_hold: attempt to raise power " "from %d to %d failed\n", pwr_p->pwr_func_lvl, PM_LEVEL_D0); pcie_pm_release(dip); @@ -705,13 +716,13 @@ pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p) ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); ASSERT(pwr_p->pwr_hold > 0); - DBG(DBG_PWR, dip, "pm_subrelease: decrementing hold \n"); + DBG(dip, "pm_subrelease: decrementing hold \n"); pwr_p->pwr_hold--; ASSERT(pwr_p->pwr_hold >= 0); ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY); level = pwr_level_allowed(pwr_p); if (pwr_p->pwr_hold == 0 && level < pwr_p->pwr_func_lvl) { - DBG(DBG_PWR, dip, "pm_subrelease: marking idle \n"); + DBG(dip, "pm_subrelease: marking idle \n"); (void) pm_idle_component(dip, 0); pwr_p->pwr_flags &= ~PCIE_PM_BUSY; } @@ -748,7 +759,7 @@ pcie_pm_add_child(dev_info_t *dip, dev_info_t *cdip) * and we stay at full power. */ ASSERT(pwr_p->pwr_hold > 0); - DBG(DBG_PWR, dip, "pm_add_child: decrementing hold \n"); + DBG(dip, "pm_add_child: decrementing hold \n"); pwr_p->pwr_hold--; /* * We must have made sure that busy bit @@ -793,7 +804,7 @@ pcie_pm_remove_child(dev_info_t *dip, dev_info_t *cdip) if ((pwr_p->pwr_hold == 0) && (!total || (pwr_level_allowed(pwr_p) < pwr_p->pwr_func_lvl))) { if (pwr_p->pwr_flags & PCIE_PM_BUSY) { - DBG(DBG_PWR, dip, "bus_power: marking idle\n"); + DBG(dip, "pcie_bus_power: marking idle\n"); (void) pm_idle_component(dip, 0); pwr_p->pwr_flags &= ~PCIE_PM_BUSY; } @@ -856,7 +867,7 @@ pcie_pwr_resume(dev_info_t *dip) * init'ed. They will be set up by init_child(). */ if (i_ddi_node_state(cdip) < DS_INITIALIZED) { - DBG(DBG_PWR, dip, + DBG(dip, "DDI_RESUME: skipping %s%d not in CF1\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); continue; @@ -869,12 +880,12 @@ pcie_pwr_resume(dev_info_t *dip) "nexus-saved-config-regs") != 1) continue; - DBG(DBG_PWR, dip, + DBG(dip, "DDI_RESUME: nexus restoring %s%d config regs\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) { - DBG(DBG_PWR, dip, "DDI_RESUME: " + DBG(dip, "DDI_RESUME: " "pci_config_setup for %s%d failed\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); continue; @@ -887,15 +898,13 @@ pcie_pwr_resume(dev_info_t *dip) if (is_pcie = pcie_is_pcie(config_handle)) pcie_disable_errors(cdip, config_handle); (void) pci_restore_config_regs(cdip); - if (is_pcie) { + if (is_pcie) pcie_enable_errors(cdip, config_handle); - (void) pcie_enable_ce(cdip, config_handle); - } pci_config_teardown(&config_handle); if (ndi_prop_remove(DDI_DEV_T_NONE, cdip, "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { - DBG(DBG_PWR, dip, "%s%d can't remove prop %s", + DBG(dip, "%s%d can't remove prop %s", ddi_driver_name(cdip), ddi_get_instance(cdip), "nexus-saved-config-regs"); } @@ -930,7 +939,7 @@ pcie_pwr_suspend(dev_info_t *dip) mutex_exit(&pwr_p->pwr_lock); if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) { - DBG(DBG_PWR, dip, "pwr_suspend: attempt " + DBG(dip, "pwr_suspend: attempt " "to raise power from %d to %d " "failed\n", pwr_p->pwr_func_lvl, PM_LEVEL_D0); @@ -965,7 +974,7 @@ pcie_pwr_suspend(dev_info_t *dip) * init'ed. They will be set up in init_child(). */ if (i_ddi_node_state(cdip) < DS_INITIALIZED) { - DBG(DBG_PWR, dip, "DDI_SUSPEND: skipping " + DBG(dip, "DDI_SUSPEND: skipping " "%s%d not in CF1\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); continue; @@ -1001,16 +1010,16 @@ pcie_pwr_suspend(dev_info_t *dip) */ if (ndi_prop_create_boolean(DDI_DEV_T_NONE, cdip, "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { - DBG(DBG_PWR, dip, "%s%d can't update prop %s", + DBG(dip, "%s%d can't update prop %s", ddi_driver_name(cdip), ddi_get_instance(cdip), "nexus-saved-config-regs"); } - DBG(DBG_PWR, dip, "DDI_SUSPEND: saving config space for" + DBG(dip, "DDI_SUSPEND: saving config space for" " %s%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); /* PCIe workaround: disable errors during 4K config save */ if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) { - DBG(DBG_PWR, dip, "DDI_SUSPEND: pci_config_setup " + DBG(dip, "DDI_SUSPEND: pci_config_setup " "for %s%d failed\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); continue; @@ -1019,10 +1028,8 @@ pcie_pwr_suspend(dev_info_t *dip) if (is_pcie = pcie_is_pcie(config_handle)) pcie_disable_errors(cdip, config_handle); (void) pci_save_config_regs(cdip); - if (is_pcie) { + if (is_pcie) pcie_enable_errors(cdip, config_handle); - (void) pcie_enable_ce(cdip, config_handle); - } pci_config_teardown(&config_handle); } return (DDI_SUCCESS); @@ -1062,4 +1069,24 @@ pcie_decode_pwr_op(pm_bus_power_op_t op) return ("UNKNOWN OP"); } +static void +pcie_pwr_dbg(dev_info_t *dip, char *fmt, ...) +{ + va_list ap; + if (!pcie_pwr_print) + return; + + if (dip) + prom_printf("%s(%d): pcie pwr: ", ddi_driver_name(dip), + ddi_get_instance(dip)); +body: + va_start(ap, fmt); + if (ap) + prom_vprintf(fmt, ap); + else + prom_printf(fmt); + + va_end(ap); +} + #endif diff --git a/usr/src/uts/sun4/io/px/pcie_pwr.h b/usr/src/uts/sun4/io/px/pcie_pwr.h index 01edea9119..ec08e1634b 100644 --- a/usr/src/uts/sun4/io/px/pcie_pwr.h +++ b/usr/src/uts/sun4/io/px/pcie_pwr.h @@ -32,8 +32,6 @@ extern "C" { #endif -#include "px_ioapi.h" /* for msiq */ - /* index of counters for each level */ #define PCIE_D3_INDEX PM_LEVEL_D3 #define PCIE_D2_INDEX PM_LEVEL_D2 diff --git a/usr/src/uts/sun4/io/px/px.c b/usr/src/uts/sun4/io/px/px.c index 8b978ee98f..d3a36c3eb7 100644 --- a/usr/src/uts/sun4/io/px/px.c +++ b/usr/src/uts/sun4/io/px/px.c @@ -256,6 +256,10 @@ px_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) (void) ddi_prop_update_string(DDI_DEV_T_NONE, dip, "device_type", "pciex"); + + /* Initialize px_dbg for high pil printing */ + px_dbg_attach(dip, &px_p->px_dbg_hdl); + /* * Get key properties of the pci bridge node and * determine it's type (psycho, schizo, etc ...). @@ -269,6 +273,12 @@ px_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) /* Initialize device handle */ px_p->px_dev_hdl = dev_hdl; + px_p->px_dq_p = (pf_data_t *) + kmem_zalloc(sizeof (pf_data_t) * pf_get_dq_size(), + KM_SLEEP); + + px_p->px_dq_tail = -1; + /* * Initialize interrupt block. Note that this * initialize error handling for the PEC as well. @@ -372,6 +382,7 @@ err_bad_ib: err_bad_dev_init: px_free_props(px_p); err_bad_px_prop: + px_dbg_detach(dip, &px_p->px_dbg_hdl); mutex_destroy(&px_p->px_mutex); ddi_soft_state_free(px_state_p, instance); err_bad_px_softstate: @@ -468,11 +479,15 @@ px_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) px_ib_detach(px_p); (void) px_lib_dev_fini(dip); + kmem_free(px_p->px_dq_p, sizeof (pf_data_t) * + pf_get_dq_size()); + /* * Free the px soft state structure and the rest of the * resources it's using. */ px_free_props(px_p); + px_dbg_detach(dip, &px_p->px_dbg_hdl); mutex_exit(&px_p->px_mutex); mutex_destroy(&px_p->px_mutex); @@ -1234,6 +1249,8 @@ px_ctlops(dev_info_t *dip, dev_info_t *rdip, if (as->cmd == DDI_ATTACH && as->result != DDI_SUCCESS) pcie_pm_release(dip); + pf_init(rdip, (void *)px_p->px_fm_ibc); + (void) pcie_postattach_child(rdip); return (DDI_SUCCESS); @@ -1254,6 +1271,9 @@ px_ctlops(dev_info_t *dip, dev_info_t *rdip, return (pcie_pm_remove_child(dip, rdip)); } return (DDI_SUCCESS); + case DDI_PRE: + pf_fini(rdip); + return (DDI_SUCCESS); default: break; } diff --git a/usr/src/uts/sun4/io/px/px_debug.c b/usr/src/uts/sun4/io/px/px_debug.c index 281dbfc2e4..ff981c9f30 100644 --- a/usr/src/uts/sun4/io/px/px_debug.c +++ b/usr/src/uts/sun4/io/px/px_debug.c @@ -32,7 +32,8 @@ #include <sys/sunddi.h> /* dev_info_t */ #include <sys/ddi_impldefs.h> #include <sys/disp.h> -#include "px_debug.h" +#include <sys/archsystm.h> /* getpil() */ +#include "px_obj.h" /*LINTLIBRARY*/ @@ -112,17 +113,35 @@ static char *px_debug_sym [] = { /* same sequence as px_debug_bit */ /* LAST */ "unknown" }; -void -px_dbg(px_debug_bit_t bit, dev_info_t *dip, char *fmt, ...) +/* Tunables */ +static int px_dbg_msg_size = 16; /* # of Qs. Must be ^2 */ + +/* Non-Tunables */ +static int px_dbg_qmask = 0xFFFF; /* Mask based on Q size */ +static px_dbg_msg_t *px_dbg_msgq = NULL; /* Debug Msg Queue */ +static uint8_t px_dbg_reference = 0; /* Reference Counter */ +static kmutex_t px_dbg_mutex; /* Mutex for dequeuing */ +static uint8_t px_dbg_qtail = 0; /* Pointer to q tail */ +static uint8_t px_dbg_qhead = 0; /* Pointer to q head */ +static uint_t px_dbg_qsize = 0; /* # of pending messages */ +static uint_t px_dbg_failed = 0; /* # of overflows */ + +/* Forward Declarations */ +static void px_dbg_print(px_debug_bit_t bit, dev_info_t *dip, char *fmt, + va_list args); +static void px_dbg_queue(px_debug_bit_t bit, dev_info_t *dip, char *fmt, + va_list args); +static uint_t px_dbg_drain(caddr_t arg1, caddr_t arg2); + +/* + * Print function called either directly by px_dbg or through soft interrupt. + * This function cannot be called directly in threads with PIL above clock. + */ +static void +px_dbg_print(px_debug_bit_t bit, dev_info_t *dip, char *fmt, va_list args) { int cont = bit >> DBG_BITS; - va_list ap; - bit &= DBG_MASK; - if (bit >= sizeof (px_debug_sym) / sizeof (char *)) - return; - if (!(1ull << bit & px_debug_flags)) - return; if (cont) goto body; @@ -132,8 +151,141 @@ px_dbg(px_debug_bit_t bit, dev_info_t *dip, char *fmt, ...) else prom_printf("px: %s: ", px_debug_sym[bit]); body: + if (args) + prom_vprintf(fmt, args); + else + prom_printf(fmt); +} + +/* + * Queueing mechanism to log px_dbg messages if calling thread is running with a + * PIL above clock. It's Multithreaded safe. + */ +static void +px_dbg_queue(px_debug_bit_t bit, dev_info_t *dip, char *fmt, va_list args) +{ + int instance = DIP_TO_INST(dip); + px_t *px_p = INST_TO_STATE(instance); + uint8_t q_no; + px_dbg_msg_t *msg_p; + + /* Check to make sure the queue hasn't overflowed */ + if (atomic_inc_uint_nv(&px_dbg_qsize) >= px_dbg_msg_size) { + px_dbg_failed++; + atomic_dec_uint(&px_dbg_qsize); + return; + } + + /* + * Grab the next available queue bucket. Incrementing the tail here + * doesn't need to be protected, as it is guaranteed to not overflow. + */ + q_no = ++px_dbg_qtail & px_dbg_qmask; + msg_p = &px_dbg_msgq[q_no]; + + ASSERT(msg_p->active == B_FALSE); + + /* Print the message in the buffer */ + vsnprintf(msg_p->msg, DBG_MSG_SIZE, fmt, args); + msg_p->bit = bit; + msg_p->dip = dip; + msg_p->active = B_TRUE; + + /* Trigger Soft Int */ + ddi_intr_trigger_softint(px_p->px_dbg_hdl, (caddr_t)NULL); +} + +/* + * Callback function for queuing px_dbg in high PIL by soft intr. This code + * assumes it will be called serially for every msg. + */ +static uint_t +px_dbg_drain(caddr_t arg1, caddr_t arg2) { + uint8_t q_no; + px_dbg_msg_t *msg_p; + uint_t ret = DDI_INTR_UNCLAIMED; + + mutex_enter(&px_dbg_mutex); + while (px_dbg_qsize) { + atomic_dec_uint(&px_dbg_qsize); + if (px_dbg_failed) { + cmn_err(CE_WARN, "%d msg(s) were lost", + px_dbg_failed); + px_dbg_failed = 0; + } + + q_no = ++px_dbg_qhead & px_dbg_qmask; + msg_p = &px_dbg_msgq[q_no]; + + if (msg_p->active) { + px_dbg_print(msg_p->bit, msg_p->dip, msg_p->msg, NULL); + msg_p->active = B_FALSE; + } + ret = DDI_INTR_CLAIMED; + } + + mutex_exit(&px_dbg_mutex); + return (ret); +} + +void +px_dbg(px_debug_bit_t bit, dev_info_t *dip, char *fmt, ...) +{ + va_list ap; + + bit &= DBG_MASK; + if (bit >= sizeof (px_debug_sym) / sizeof (char *)) + return; + if (!(1ull << bit & px_debug_flags)) + return; + va_start(ap, fmt); - prom_vprintf(fmt, ap); + if (getpil() > LOCK_LEVEL) + px_dbg_queue(bit, dip, fmt, ap); + else + px_dbg_print(bit, dip, fmt, ap); va_end(ap); } #endif /* DEBUG */ + +void +px_dbg_attach(dev_info_t *dip, ddi_softint_handle_t *dbg_hdl) +{ +#ifdef DEBUG + if (px_dbg_reference++ == 0) { + int size = px_dbg_msg_size; + + /* Check if px_dbg_msg_size is ^2 */ + size = (size & (size - 1)) ? ((size | ~size) + 1) : size; + px_dbg_msg_size = size; + px_dbg_qmask = size - 1; + px_dbg_msgq = kmem_zalloc(sizeof (px_dbg_msg_t) * size, + KM_SLEEP); + + mutex_init(&px_dbg_mutex, NULL, MUTEX_DRIVER, NULL); + } + + if (ddi_intr_add_softint(dip, dbg_hdl, + DDI_INTR_SOFTPRI_MAX, px_dbg_drain, NULL) != DDI_SUCCESS) { + DBG(DBG_ATTACH, dip, + "Unable to allocate soft int for DBG printing.\n"); + dbg_hdl = NULL; + } +#endif /* DEBUG */ +} + +/* ARGSUSED */ +void +px_dbg_detach(dev_info_t *dip, ddi_softint_handle_t *dbg_hdl) +{ +#ifdef DEBUG + if (--px_dbg_reference == 0) { + if (dbg_hdl != NULL) + (void) ddi_intr_remove_softint(*dbg_hdl); + if (px_dbg_msgq != NULL) + kmem_free(px_dbg_msgq, + sizeof (px_dbg_msg_t) * px_dbg_msg_size); + mutex_destroy(&px_dbg_mutex); + } +#endif /* DEBUG */ +} diff --git a/usr/src/uts/sun4/io/px/px_debug.h b/usr/src/uts/sun4/io/px/px_debug.h index 5916a6928b..dc3fb2c6c7 100644 --- a/usr/src/uts/sun4/io/px/px_debug.h +++ b/usr/src/uts/sun4/io/px/px_debug.h @@ -110,6 +110,18 @@ typedef enum { /* same sequence as px_debug_sym[] */ #define DBG_BITS 6 #define DBG_CONT (1 << DBG_BITS) #define DBG_MASK (DBG_CONT - 1) +#define DBG_MSG_SIZE 320 + +/* Used only during High PIL printing */ +typedef struct px_dbg_msg { + boolean_t active; + px_debug_bit_t bit; + dev_info_t *dip; + char msg[DBG_MSG_SIZE]; +} px_dbg_msg_t; + +extern void px_dbg_attach(dev_info_t *dip, ddi_softint_handle_t *px_dbg_hdl); +extern void px_dbg_detach(dev_info_t *dip, ddi_softint_handle_t *px_dbg_hdl); #if defined(DEBUG) diff --git a/usr/src/uts/sun4/io/px/px_fm.c b/usr/src/uts/sun4/io/px/px_fm.c index 88ff144bdf..147c2e64cf 100644 --- a/usr/src/uts/sun4/io/px/px_fm.c +++ b/usr/src/uts/sun4/io/px/px_fm.c @@ -36,52 +36,27 @@ #include <sys/membar.h> #include "px_obj.h" -typedef struct px_fabric_cfgspace { - /* Error information */ - msgcode_t msg_code; - pcie_req_id_t rid; - - /* Config space header and device type */ - uint8_t hdr_type; - uint16_t dev_type; - - /* Register pointers */ - uint16_t cap_off; - uint16_t aer_off; - - /* PCI register values */ - uint32_t sts_reg; - uint32_t sts_sreg; - - /* PCIE register values */ - uint32_t dev_sts_reg; - uint32_t aer_ce_reg; - uint32_t aer_ue_reg; - uint32_t aer_sev_reg; - uint32_t aer_ue_sreg; - uint32_t aer_sev_sreg; - - /* PCIE Header Log Registers */ - uint32_t aer_h1; - uint32_t aer_h2; - uint32_t aer_h3; - uint32_t aer_h4; - uint32_t aer_sh1; - uint32_t aer_sh2; - uint32_t aer_sh3; - uint32_t aer_sh4; -} px_fabric_cfgspace_t; - -static uint16_t px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid); -static uint16_t px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid); -static int px_fabric_handle_psts(px_fabric_cfgspace_t *cs); -static int px_fabric_handle_ssts(px_fabric_cfgspace_t *cs); -static int px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs); -static int px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs); -static int px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs); -static void px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs); -static uint_t px_fabric_check(px_t *px_p, msgcode_t msg_code, - pcie_req_id_t rid, ddi_fm_error_t *derr); +#define PX_PCIE_PANIC_BITS \ + (PCIE_AER_UCE_DLP | PCIE_AER_UCE_FCP | PCIE_AER_UCE_TO | \ + PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP | PCIE_AER_UCE_ECRC | \ + PCIE_AER_UCE_UR) +#define PX_PCIE_NO_PANIC_BITS \ + (PCIE_AER_UCE_TRAINING | PCIE_AER_UCE_SD | PCIE_AER_UCE_CA | \ + PCIE_AER_UCE_UC) + +static void px_err_fill_pfd(dev_info_t *rpdip, px_err_pcie_t *regs); +static int px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, + px_err_pcie_t *regs); + +#if defined(DEBUG) +static void px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs, int severity); +#else /* DEBUG */ +#define px_pcie_log 0 && +#endif /* DEBUG */ + +/* external functions */ +extern int pci_xcap_locate(ddi_acc_handle_t h, uint16_t id, uint16_t *base_p); +extern int pci_lcap_locate(ddi_acc_handle_t h, uint8_t id, uint16_t *base_p); /* * Initialize px FMA support @@ -197,17 +172,6 @@ px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip) } /* - * Function used by PCI error handlers to check if captured address is stored - * in the DMA or ACC handle caches. - */ -int -px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar) -{ - int ret = ndi_fmc_error(dip, NULL, type, fme_ena, afar); - return (ret == DDI_FM_UNKNOWN ? DDI_FM_FATAL : ret); -} - -/* * Function used to initialize FMA for our children nodes. Called * through pci busops when child node calls ddi_fm_init. */ @@ -262,435 +226,109 @@ px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle) /* * PCI error callback which is registered with our parent to call * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors - * and PCI BERR/TO/UE - * - * Dispatch on all known leaves of this fire device because we cannot tell - * which side the error came from. + * and PCI BERR/TO/UE on IO Loads. */ /*ARGSUSED*/ int px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) { - px_t *px_p = (px_t *)impl_data; - int err = PX_OK; - int fatal = 0; - int nonfatal = 0; - int unknown = 0; - int ret = DDI_FM_OK; - - mutex_enter(&px_p->px_fm_mutex); - - err = px_err_handle(px_p, derr, PX_TRAP_CALL, B_TRUE); - - if (!px_lib_is_in_drain_state(px_p)) - ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr); - - mutex_exit(&px_p->px_fm_mutex); - - switch (ret) { - case DDI_FM_FATAL: - fatal++; - break; - case DDI_FM_NONFATAL: - nonfatal++; - break; - case DDI_FM_UNKNOWN: - unknown++; - break; - default: - break; - } - - ret = (fatal != 0) ? DDI_FM_FATAL : - ((nonfatal != 0) ? DDI_FM_NONFATAL : - (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK))); - - /* fire fatal error overrides device error */ - if (err & (PX_FATAL_GOS | PX_FATAL_SW)) - ret = DDI_FM_FATAL; - /* if fire encounts no error, then take whatever device error */ - else if ((err != PX_OK) && (ret != DDI_FM_FATAL)) - ret = DDI_FM_NONFATAL; - - return (ret); -} - -static uint16_t -px_fabric_get_aer(px_t *px_p, pcie_req_id_t rid) -{ - uint32_t hdr, hdr_next_ptr, hdr_cap_id; - uint16_t offset = PCIE_EXT_CAP; - int deadcount = 0; - - /* Find the Advanced Error Register */ - hdr = px_fab_get(px_p, rid, offset); - hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & - PCIE_EXT_CAP_NEXT_PTR_MASK; - hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & - PCIE_EXT_CAP_ID_MASK; - - while ((hdr_next_ptr != PCIE_EXT_CAP_NEXT_PTR_NULL) && - (hdr_cap_id != PCIE_EXT_CAP_ID_AER)) { - offset = hdr_next_ptr; - hdr = px_fab_get(px_p, rid, offset); - hdr_next_ptr = (hdr >> PCIE_EXT_CAP_NEXT_PTR_SHIFT) & - PCIE_EXT_CAP_NEXT_PTR_MASK; - hdr_cap_id = (hdr >> PCIE_EXT_CAP_ID_SHIFT) & - PCIE_EXT_CAP_ID_MASK; - - if (deadcount++ > 100) - break; - } - - if (hdr_cap_id == PCIE_EXT_CAP_ID_AER) - return (offset); - - return (0); -} + dev_info_t *pdip = ddi_get_parent(dip); + px_t *px_p = (px_t *)impl_data; + int i, acc_type = 0; + int lookup, rc_err, fab_err = PF_NO_PANIC; + uint32_t addr, addr_high, addr_low; + pcie_req_id_t bdf; + px_ranges_t *ranges_p; + int range_len; -static uint16_t -px_fabric_get_pciecap(px_t *px_p, pcie_req_id_t rid) -{ - uint32_t hdr, hdr_next_ptr, hdr_cap_id; - uint16_t offset = PCI_CONF_STAT; - int deadcount = 0; - - hdr = px_fab_get(px_p, rid, PCI_CONF_COMM) >> 16; - if (!(hdr & PCI_STAT_CAP)) { - /* This is not a PCIE device */ - return (0); + /* + * Deadlock scenario: + * 1. A fabric or mondo 62 interrupt with respect to px0 - T1/cpu0; + * 2. While error handling thread T1 is running on cpu0, a trap + * occurs to cpu1 - T2/cpu1; + * 3. While doing error handling on T1, a precise trap occurs, + * overtaken T1 - T1+/cpu0; + * + * Why threads deadlock: + * T1 owns px_fm_mutex, T2 owns rootnex' fh_lock, but blocked on + * px_fm_mutex, T1+ blocked on rootnex' fh_lock which won't be + * released since T2 will never get px_fm_mutex since T1+ buried + * thread T1 who is responsible for releasing px_fm_mutex. + * + * Solution: + * px_fm_callback must release rootnex' fh_lock prior to acquire + * px_fm_mutex and reaquire the fh_lock after release px_fm_mutex; + * if px_fm_callback is unable to acquire px_fm_mutex, meaning the + * latest trap has either overtaken the error handling thread or an + * error handling thread on another cpu owns it, just quit with OK + * status. Note, in this case, the cpu sync error handler should + * respect nexus'return status and not to panic, otherwise system + * will hang. + */ + i_ddi_fm_handler_exit(pdip); + if (!mutex_tryenter(&px_p->px_fm_mutex)) { + i_ddi_fm_handler_enter(pdip); + return (DDI_FM_OK); } - hdr = px_fab_get(px_p, rid, PCI_CONF_CAP_PTR); - hdr_next_ptr = hdr & 0xFF; - hdr_cap_id = 0; - - while ((hdr_next_ptr != PCI_CAP_NEXT_PTR_NULL) && - (hdr_cap_id != PCI_CAP_ID_PCI_E)) { - offset = hdr_next_ptr; + addr_high = (uint32_t)((uint64_t)derr->fme_bus_specific >> 32); + addr_low = (uint32_t)((uint64_t)derr->fme_bus_specific); - if (hdr_next_ptr < 0x40) { + /* + * Make sure this failed load came from this PCIe port. Check by + * matching the upper 32 bits of the address with the ranges property. + */ + range_len = px_p->px_ranges_length / sizeof (px_ranges_t); + i = 0; + for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) { + if (ranges_p->parent_high == addr_high) { + switch (ranges_p->child_high & PCI_ADDR_MASK) { + case PCI_ADDR_CONFIG: + acc_type = PF_CFG_ADDR; + addr = NULL; + bdf = (pcie_req_id_t)(addr_low >> 12); + break; + case PCI_ADDR_MEM32: + acc_type = PF_DMA_ADDR; + addr = addr_low; + bdf = NULL; + break; + } break; } - - hdr = px_fab_get(px_p, rid, hdr_next_ptr); - hdr_next_ptr = (hdr >> 8) & 0xFF; - hdr_cap_id = hdr & 0xFF; - - if (deadcount++ > 100) - break; } - if (hdr_cap_id == PCI_CAP_ID_PCI_E) - return (offset); - - return (0); -} + /* This address doesn't belong to this leaf, just return with OK */ + if (!acc_type) { + mutex_exit(&px_p->px_fm_mutex); + i_ddi_fm_handler_enter(pdip); + return (DDI_FM_OK); + } -/* - * This function checks the primary status registers. - * Take the PCI status register and translate it to PCIe equivalent. - */ -static int -px_fabric_handle_psts(px_fabric_cfgspace_t *cs) { - uint16_t sts_reg = cs->sts_reg >> 16; - uint16_t pci_status; - uint32_t pcie_status; - int ret = PX_NONFATAL; - - /* Parity Err == Send/Recv Poisoned TLP */ - pci_status = PCI_STAT_S_PERROR | PCI_STAT_PERROR; - pcie_status = PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC; - if (sts_reg & pci_status) - ret |= PX_FABRIC_ERR_SEV(pcie_status, - px_fabric_die_ue, px_fabric_die_ue_gos); - - /* Target Abort == Completer Abort */ - pci_status = PCI_STAT_S_TARG_AB | PCI_STAT_R_TARG_AB; - pcie_status = PCIE_AER_UCE_CA; - if (sts_reg & pci_status) - ret |= PX_FABRIC_ERR_SEV(pcie_status, - px_fabric_die_ue, px_fabric_die_ue_gos); - - /* Master Abort == Unsupport Request */ - pci_status = PCI_STAT_R_MAST_AB; - pcie_status = PCIE_AER_UCE_UR; - if (sts_reg & pci_status) - ret |= PX_FABRIC_ERR_SEV(pcie_status, - px_fabric_die_ue, px_fabric_die_ue_gos); - - /* System Error == Uncorrectable Error */ - pci_status = PCI_STAT_S_SYSERR; - pcie_status = (uint32_t)-1; - if (sts_reg & pci_status) - ret |= PX_FABRIC_ERR_SEV(pcie_status, - px_fabric_die_ue, px_fabric_die_ue_gos); - - return (ret); -} + rc_err = px_err_cmn_intr(px_p, derr, PX_TRAP_CALL, PX_FM_BLOCK_ALL); + lookup = pf_hdl_lookup(dip, derr->fme_ena, acc_type, addr, bdf); -/* - * This function checks the secondary status registers. - * Switches and Bridges have a different behavior. - */ -static int -px_fabric_handle_ssts(px_fabric_cfgspace_t *cs) { - uint16_t sts_reg = cs->sts_sreg >> 16; - int ret = PX_NONFATAL; - - if (cs->dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) { + if (!px_lib_is_in_drain_state(px_p)) { /* - * This is a PCIE-PCI bridge, but only check the severity - * if this device doesn't support AERs. + * This is to ensure that device corresponding to the addr of + * the failed PIO/CFG load gets scanned. */ - if (!cs->aer_off) - ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_bdg_sts, - px_fabric_die_bdg_sts_gos); - } else { - /* This is most likely a PCIE switch */ - ret |= PX_FABRIC_ERR_SEV(sts_reg, px_fabric_die_sw_sts, - px_fabric_die_sw_sts_gos); - } - - return (ret); -} - -/* - * This function checks and clears the primary AER. - */ -static int -px_fabric_handle_paer(px_t *px_p, px_fabric_cfgspace_t *cs) { - uint32_t chk_reg, chk_reg_gos, off_reg, reg; - int ret = PX_NONFATAL; - - /* Determine severity and clear the AER */ - switch (cs->msg_code) { - case PCIE_MSG_CODE_ERR_COR: - off_reg = PCIE_AER_CE_STS; - chk_reg = px_fabric_die_ce; - chk_reg_gos = px_fabric_die_ce_gos; - reg = cs->aer_ce_reg; - break; - case PCIE_MSG_CODE_ERR_NONFATAL: - off_reg = PCIE_AER_UCE_STS; - chk_reg = px_fabric_die_ue; - chk_reg_gos = px_fabric_die_ue_gos; - reg = cs->aer_ue_reg & ~(cs->aer_sev_reg); - break; - case PCIE_MSG_CODE_ERR_FATAL: - off_reg = PCIE_AER_UCE_STS; - chk_reg = px_fabric_die_ue; - chk_reg_gos = px_fabric_die_ue_gos; - reg = cs->aer_ue_reg & cs->aer_sev_reg; - break; - default: - /* Major error force a panic */ - return (PX_FATAL_GOS); + px_rp_en_q(px_p, bdf, addr, + (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB)); + fab_err = pf_scan_fabric(dip, derr, px_p->px_dq_p, + &px_p->px_dq_tail); } - px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); - ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); - return (ret); -} + mutex_exit(&px_p->px_fm_mutex); + i_ddi_fm_handler_enter(pdip); -/* - * This function checks and clears the secondary AER. - */ -static int -px_fabric_handle_saer(px_t *px_p, px_fabric_cfgspace_t *cs) { - uint32_t chk_reg, chk_reg_gos, off_reg, reg; - uint32_t sev; - int ret = PX_NONFATAL; - - /* Determine severity and clear the AER */ - switch (cs->msg_code) { - case PCIE_MSG_CODE_ERR_COR: - /* Ignore Correctable Errors */ - sev = 0; - break; - case PCIE_MSG_CODE_ERR_NONFATAL: - sev = ~(cs->aer_sev_sreg); - break; - case PCIE_MSG_CODE_ERR_FATAL: - sev = cs->aer_sev_sreg; - break; - default: - /* Major error force a panic */ + if ((rc_err & (PX_PANIC | PX_PROTECTED)) || (fab_err & PF_PANIC) || + (lookup == PF_HDL_NOTFOUND)) return (DDI_FM_FATAL); - } - off_reg = PCIE_AER_SUCE_STS; - chk_reg = px_fabric_die_sue; - chk_reg_gos = px_fabric_die_sue_gos; - reg = cs->aer_ue_sreg & sev; - px_fab_set(px_p, cs->rid, cs->aer_off + off_reg, reg); - ret |= PX_FABRIC_ERR_SEV(reg, chk_reg, chk_reg_gos); - - return (ret); -} - -static int -px_fabric_handle(px_t *px_p, px_fabric_cfgspace_t *cs) -{ - pcie_req_id_t rid = cs->rid; - uint16_t cap_off = cs->cap_off; - uint16_t aer_off = cs->aer_off; - uint8_t hdr_type = cs->hdr_type; - uint16_t dev_type = cs->dev_type; - int ret = PX_NONFATAL; - - if (hdr_type == PCI_HEADER_PPB) { - ret |= px_fabric_handle_ssts(cs); - } - - if (!aer_off) { - ret |= px_fabric_handle_psts(cs); - } - - if (aer_off) { - ret |= px_fabric_handle_paer(px_p, cs); - } - - if (aer_off && (dev_type == PCIE_PCIECAP_DEV_TYPE_PCIE2PCI)) { - ret |= px_fabric_handle_saer(px_p, cs); - } - - /* Clear the standard PCIe error registers */ - px_fab_set(px_p, rid, cap_off + PCIE_DEVCTL, cs->dev_sts_reg); - - /* Clear the legacy error registers */ - px_fab_set(px_p, rid, PCI_CONF_COMM, cs->sts_reg); - - /* Clear the legacy secondary error registers */ - if (hdr_type == PCI_HEADER_PPB) { - px_fab_set(px_p, rid, PCI_BCNF_IO_BASE_LOW, - cs->sts_sreg); - } - - return (ret); -} - -static void -px_fabric_fill_cs(px_t *px_p, px_fabric_cfgspace_t *cs) -{ - uint16_t cap_off, aer_off; - pcie_req_id_t rid = cs->rid; - - /* Gather Basic Device Information */ - cs->hdr_type = (px_fab_get(px_p, rid, PCI_CONF_CACHE_LINESZ) >> 16) & - PCI_HEADER_TYPE_M; - - cs->cap_off = px_fabric_get_pciecap(px_p, rid); - cap_off = cs->cap_off; - if (!cap_off) - return; + else if ((rc_err == PX_NO_ERROR) && (fab_err == PF_NO_ERROR)) + return (DDI_FM_OK); - cs->aer_off = px_fabric_get_aer(px_p, rid); - aer_off = cs->aer_off; - - cs->dev_type = px_fab_get(px_p, rid, cap_off) >> 16; - cs->dev_type &= PCIE_PCIECAP_DEV_TYPE_MASK; - - /* Get the Primary Sts Reg */ - cs->sts_reg = px_fab_get(px_p, rid, PCI_CONF_COMM); - - /* If it is a bridge/switch get the Secondary Sts Reg */ - if (cs->hdr_type == PCI_HEADER_PPB) - cs->sts_sreg = px_fab_get(px_p, rid, - PCI_BCNF_IO_BASE_LOW); - - /* Get the PCIe Dev Sts Reg */ - cs->dev_sts_reg = px_fab_get(px_p, rid, - cap_off + PCIE_DEVCTL); - - if (!aer_off) - return; - - /* Get the AER register information */ - cs->aer_ce_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_CE_STS); - cs->aer_ue_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_STS); - cs->aer_sev_reg = px_fab_get(px_p, rid, aer_off + PCIE_AER_UCE_SERV); - cs->aer_h1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x0); - cs->aer_h2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x4); - cs->aer_h3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0x8); - cs->aer_h4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_HDR_LOG + 0xC); - - if (cs->dev_type != PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) - return; - - /* If this is a bridge check secondary aer */ - cs->aer_ue_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_STS); - cs->aer_sev_sreg = px_fab_get(px_p, rid, aer_off + PCIE_AER_SUCE_SERV); - cs->aer_sh1 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x0); - cs->aer_sh2 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x4); - cs->aer_sh3 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0x8); - cs->aer_sh4 = px_fab_get(px_p, rid, aer_off + PCIE_AER_SHDR_LOG + 0xC); -} - -/* - * If a fabric intr occurs, query and clear the error registers on that device. - * Based on the error found return DDI_FM_OK or DDI_FM_FATAL. - */ -static uint_t -px_fabric_check(px_t *px_p, msgcode_t msg_code, - pcie_req_id_t rid, ddi_fm_error_t *derr) -{ - dev_info_t *dip = px_p->px_dip; - char buf[FM_MAX_CLASS]; - px_fabric_cfgspace_t cs; - int ret; - - /* clear cs */ - bzero(&cs, sizeof (px_fabric_cfgspace_t)); - - cs.msg_code = msg_code; - cs.rid = rid; - - px_fabric_fill_cs(px_p, &cs); - if (cs.cap_off) - ret = px_fabric_handle(px_p, &cs); - else - ret = PX_FATAL_GOS; - - (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_FABRIC_CLASS); - ddi_fm_ereport_post(dip, buf, derr->fme_ena, - DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, - PX_FM_FABRIC_MSG_CODE, DATA_TYPE_UINT8, msg_code, - PX_FM_FABRIC_REQ_ID, DATA_TYPE_UINT16, rid, - "cap_off", DATA_TYPE_UINT16, cs.cap_off, - "aer_off", DATA_TYPE_UINT16, cs.aer_off, - "sts_reg", DATA_TYPE_UINT16, cs.sts_reg >> 16, - "sts_sreg", DATA_TYPE_UINT16, cs.sts_sreg >> 16, - "dev_sts_reg", DATA_TYPE_UINT16, cs.dev_sts_reg >> 16, - "aer_ce", DATA_TYPE_UINT32, cs.aer_ce_reg, - "aer_ue", DATA_TYPE_UINT32, cs.aer_ue_reg, - "aer_sev", DATA_TYPE_UINT32, cs.aer_sev_reg, - "aer_h1", DATA_TYPE_UINT32, cs.aer_h1, - "aer_h2", DATA_TYPE_UINT32, cs.aer_h2, - "aer_h3", DATA_TYPE_UINT32, cs.aer_h3, - "aer_h4", DATA_TYPE_UINT32, cs.aer_h4, - "saer_ue", DATA_TYPE_UINT32, cs.aer_ue_sreg, - "saer_sev", DATA_TYPE_UINT32, cs.aer_sev_sreg, - "saer_h1", DATA_TYPE_UINT32, cs.aer_sh1, - "saer_h2", DATA_TYPE_UINT32, cs.aer_sh2, - "saer_h3", DATA_TYPE_UINT32, cs.aer_sh3, - "saer_h4", DATA_TYPE_UINT32, cs.aer_sh4, - "severity", DATA_TYPE_UINT32, ret, - NULL); - - /* Check for protected access */ - switch (derr->fme_flag) { - case DDI_FM_ERR_EXPECTED: - case DDI_FM_ERR_PEEK: - case DDI_FM_ERR_POKE: - ret &= PX_FATAL_GOS; - break; - } - - - if (px_fabric_die && - (ret & (PX_FATAL_GOS | PX_FATAL_SW))) - ret = DDI_FM_FATAL; - return (ret); + return (DDI_FM_NONFATAL); } /* @@ -698,7 +336,7 @@ px_fabric_check(px_t *px_p, msgcode_t msg_code, * Interrupt handler for PCIE fabric block. * o lock * o create derr - * o px_err_handle(leaf, with jbc) + * o px_err_cmn_intr(leaf, with jbc) * o send ereport(fire fmri, derr, payload = BDF) * o dispatch (leaf) * o unlock @@ -706,11 +344,10 @@ px_fabric_check(px_t *px_p, msgcode_t msg_code, */ /* ARGSUSED */ uint_t -px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, - pcie_req_id_t rid) +px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, pcie_req_id_t rid) { dev_info_t *rpdip = px_p->px_dip; - int err = PX_OK, ret = DDI_FM_OK, fab_err = DDI_FM_OK; + int rc_err, fab_err = PF_NO_PANIC; ddi_fm_error_t derr; mutex_enter(&px_p->px_fm_mutex); @@ -721,26 +358,20 @@ px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); derr.fme_flag = DDI_FM_ERR_UNEXPECTED; - /* send ereport/handle/clear fire registers */ - err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); + /* Ensure that the rid of the fabric message will get scanned. */ + px_rp_en_q(px_p, rid, NULL, NULL); - /* Check and clear the fabric error */ - fab_err = px_fabric_check(px_p, msg_code, rid, &derr); + rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); - /* Check all child devices for errors */ - ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); + /* call rootport dispatch */ + if (!px_lib_is_in_drain_state(px_p)) { + fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, + &px_p->px_dq_tail); + } mutex_exit(&px_p->px_fm_mutex); - /* - * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, - * therefore it does not cause panic. - */ - if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || - (ret == DDI_FM_FATAL) || (fab_err == DDI_FM_FATAL)) - PX_FM_PANIC("%s#%d: Fatal PCIe Fabric Error has occurred" - "(%x,%x,%x)\n", ddi_driver_name(rpdip), - ddi_get_instance(rpdip), err, fab_err, ret); + px_err_panic(rc_err, PX_RC, fab_err); return (DDI_INTR_CLAIMED); } @@ -807,3 +438,255 @@ px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr) break; } } + +/* + * Suggest panic if any EQ (except CE q) has overflown. + */ +int +px_err_check_eq(dev_info_t *dip) +{ + px_t *px_p = DIP_TO_STATE(dip); + px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; + px_pec_t *pec_p = px_p->px_pec_p; + msiqid_t eq_no = msiq_state_p->msiq_1st_msiq_id; + pci_msiq_state_t msiq_state; + int i; + + for (i = 0; i < msiq_state_p->msiq_cnt; i++) { + if (i + eq_no == pec_p->pec_corr_msg_msiq_id) /* skip CE q */ + continue; + if ((px_lib_msiq_getstate(dip, i + eq_no, &msiq_state) != + DDI_SUCCESS) || msiq_state == PCI_MSIQ_STATE_ERROR) + return (PX_PANIC); + } + return (PX_NO_PANIC); +} + +static void +px_err_fill_pfd(dev_info_t *rpdip, px_err_pcie_t *regs) +{ + px_t *px_p = DIP_TO_STATE(rpdip); + pf_data_t pf_data = {0}; + pcie_req_id_t fault_bdf = 0; + uint32_t fault_addr = 0; + uint16_t s_status = 0; + + /* + * set RC s_status in PCI term to coordinate with downstream fabric + * errors ananlysis. + */ + if (regs->primary_ue & PCIE_AER_UCE_UR) + s_status = PCI_STAT_R_MAST_AB; + if (regs->primary_ue & PCIE_AER_UCE_CA) + s_status = PCI_STAT_R_TARG_AB; + if (regs->primary_ue & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC)) + s_status = PCI_STAT_PERROR; + + if (regs->primary_ue & (PCIE_AER_UCE_UR | PCIE_AER_UCE_CA)) { + pf_data.aer_h0 = regs->rx_hdr1; + pf_data.aer_h1 = regs->rx_hdr2; + pf_data.aer_h2 = regs->rx_hdr3; + pf_data.aer_h3 = regs->rx_hdr4; + + pf_tlp_decode(rpdip, &pf_data, &fault_bdf, NULL, NULL); + } else if (regs->primary_ue & PCIE_AER_UCE_PTLP) { + pcie_tlp_hdr_t *tlp_p; + + pf_data.aer_h0 = regs->rx_hdr1; + pf_data.aer_h1 = regs->rx_hdr2; + pf_data.aer_h2 = regs->rx_hdr3; + pf_data.aer_h3 = regs->rx_hdr4; + + tlp_p = (pcie_tlp_hdr_t *)&pf_data.aer_h0; + if (tlp_p->type == PCIE_TLP_TYPE_CPL) + pf_tlp_decode(rpdip, &pf_data, &fault_bdf, NULL, NULL); + + pf_data.aer_h0 = regs->tx_hdr1; + pf_data.aer_h1 = regs->tx_hdr2; + pf_data.aer_h2 = regs->tx_hdr3; + pf_data.aer_h3 = regs->tx_hdr4; + + pf_tlp_decode(rpdip, &pf_data, NULL, &fault_addr, NULL); + } + + px_rp_en_q(px_p, fault_bdf, fault_addr, s_status); +} + +int +px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs) +{ + uint32_t ce_reg, ue_reg; + int err = PX_NO_ERROR; + + ce_reg = regs->ce_reg; + if (ce_reg) + err |= (ce_reg & px_fabric_die_rc_ce) ? PX_PANIC : PX_NO_ERROR; + + ue_reg = regs->ue_reg; + if (!ue_reg) + goto done; + + if (ue_reg & PCIE_AER_UCE_PTLP) + err |= px_pcie_ptlp(dip, derr, regs); + + if (ue_reg & PX_PCIE_PANIC_BITS) + err |= PX_PANIC; + + if (ue_reg & PX_PCIE_NO_PANIC_BITS) + err |= PX_NO_PANIC; + + /* Scan the fabric to clean up error bits, for the following errors. */ + if (ue_reg & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_CA | PCIE_AER_UCE_UR)) + px_err_fill_pfd(dip, regs); +done: + px_pcie_log(dip, regs, err); + return (err); +} + +#if defined(DEBUG) +static void +px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs, int severity) +{ + DBG(DBG_ERR_INTR, dip, + "A PCIe RC error has occured with a severity of \"%s\"\n" + "\tCE: 0x%x UE: 0x%x Primary UE: 0x%x\n" + "\tTX Hdr: 0x%x 0x%x 0x%x 0x%x\n\tRX Hdr: 0x%x 0x%x 0x%x 0x%x\n", + (severity & PX_PANIC) ? "PANIC" : "NO PANIC", regs->ce_reg, + regs->ue_reg, regs->primary_ue, regs->tx_hdr1, regs->tx_hdr2, + regs->tx_hdr3, regs->tx_hdr4, regs->rx_hdr1, regs->rx_hdr2, + regs->rx_hdr3, regs->rx_hdr4); +} +#endif /* DEBUG */ + +/* + * look through poisoned TLP cases and suggest panic/no panic depend on + * handle lookup. + */ +static int +px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs) +{ + pf_data_t pf_data; + pcie_req_id_t bdf; + uint32_t addr, trans_type; + int tlp_sts, tlp_cmd; + int sts = PF_HDL_NOTFOUND; + + if (regs->primary_ue != PCIE_AER_UCE_PTLP) + return (PX_PANIC); + + if (!regs->rx_hdr1) + goto done; + + pf_data.aer_h0 = regs->rx_hdr1; + pf_data.aer_h1 = regs->rx_hdr2; + pf_data.aer_h2 = regs->rx_hdr3; + pf_data.aer_h3 = regs->rx_hdr4; + + tlp_sts = pf_tlp_decode(dip, &pf_data, &bdf, &addr, &trans_type); + tlp_cmd = ((pcie_tlp_hdr_t *)(&pf_data.aer_h0))->type; + + if (tlp_sts == DDI_FAILURE) + goto done; + + switch (tlp_cmd) { + case PCIE_TLP_TYPE_CPL: + case PCIE_TLP_TYPE_CPLLK: + /* + * Usually a PTLP is a CPL with data. Grab the completer BDF + * from the RX TLP, and the original address from the TX TLP. + */ + if (regs->tx_hdr1) { + pf_data.aer_h0 = regs->tx_hdr1; + pf_data.aer_h1 = regs->tx_hdr2; + pf_data.aer_h2 = regs->tx_hdr3; + pf_data.aer_h3 = regs->tx_hdr4; + + sts = pf_tlp_decode(dip, &pf_data, NULL, &addr, + &trans_type); + } /* FALLTHRU */ + case PCIE_TLP_TYPE_IO: + case PCIE_TLP_TYPE_MEM: + case PCIE_TLP_TYPE_MEMLK: + sts = pf_hdl_lookup(dip, derr->fme_ena, trans_type, addr, bdf); + break; + default: + sts = PF_HDL_NOTFOUND; + } +done: + return (sts == PF_HDL_NOTFOUND ? PX_PANIC : PX_NO_PANIC); +} + +/* + * This function appends a pf_data structure to the error q which is used later + * during PCIe fabric scan. It signifies: + * o errs rcvd in RC, that may have been propagated to/from the fabric + * o the fabric scan code should scan the device path of fault bdf/addr + * + * fault_bdf: The bdf that caused the fault, which may have error bits set. + * fault_addr: The PIO addr that caused the fault, such as failed PIO, but not + * failed DMAs. + * s_status: Secondary Status equivalent to why the fault occured. + * (ie S-TA/MA, R-TA) + * Either the fault bdf or addr may be NULL, but not both. + */ +int px_foo = 0; +void +px_rp_en_q(px_t *px_p, pcie_req_id_t fault_bdf, uint32_t fault_addr, + uint16_t s_status) +{ + pf_data_t pf_data = {0}; + + if (!fault_bdf && !fault_addr) + return; + + pf_data.dev_type = PCIE_PCIECAP_DEV_TYPE_ROOT; + if (px_foo) { + pf_data.fault_bdf = px_foo; + px_foo = 0; + } else + pf_data.fault_bdf = fault_bdf; + + pf_data.fault_addr = fault_addr; + pf_data.s_status = s_status; + pf_data.send_erpt = PF_SEND_ERPT_NO; + + (void) pf_en_dq(&pf_data, px_p->px_dq_p, &px_p->px_dq_tail, -1); +} + +/* + * Panic if the err tunable is set and that we are not already in the middle + * of panic'ing. + */ +#define MSZ (sizeof (fm_msg) -strlen(fm_msg) - 1) +void +px_err_panic(int err, int msg, int fab_err) +{ + char fm_msg[96] = ""; + int ferr = PX_NO_ERROR; + + if (panicstr) + return; + + if (!(err & px_die)) + goto fabric; + if (msg & PX_RC) + (void) strncat(fm_msg, px_panic_rc_msg, MSZ); + if (msg & PX_RP) + (void) strncat(fm_msg, px_panic_rp_msg, MSZ); + if (msg & PX_HB) + (void) strncat(fm_msg, px_panic_hb_msg, MSZ); + +fabric: + if (fab_err & PF_PANIC) + ferr = PX_PANIC; + if (fab_err & ~(PF_PANIC | PF_NO_ERROR)) + ferr = PX_NO_PANIC; + if (ferr & px_die) { + if (strlen(fm_msg)) + (void) strncat(fm_msg, " and", MSZ); + (void) strncat(fm_msg, px_panic_fab_msg, MSZ); + } + + if (strlen(fm_msg)) + fm_panic("Fatal error has occured in:%s.", fm_msg); +} diff --git a/usr/src/uts/sun4/io/px/px_fm.h b/usr/src/uts/sun4/io/px/px_fm.h index c265d4456c..df98ee552a 100644 --- a/usr/src/uts/sun4/io/px/px_fm.h +++ b/usr/src/uts/sun4/io/px/px_fm.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,27 +45,54 @@ extern "C" { /* * Definition of Fire internal error severity - - * PX_FATAL_HW: errors that automatically cause Fire HW reset, - * PX_FATAL_GOS: errors that causes OS cease to function immediately, - * PX_STUCK_FATAL: errors that is likely to spam, causing hang, - * PX_FATAL_SW: errors that cause partial OS lose function, - * PX_NONFATAL: errors that can be recovered or ignored. + * HW Reset Errors that cause hardware to automatically reset. Software is + * being reset along, sticky status bits need to be cleaned up upon + * system initialization. + * Panic Errors that definitely result in panic'ing the system. + * Expected Expected error, do not panic, plus do not send ereport. + * Protected Errors SW to determine panic or not, forgivable for safe access. + * Set when SW determines this error is forgivable during safe acc. + * No-panic Errors that don't directly result in panic'ing the system. + * No-Error When an interrupt occured and no errors were seen */ -#define PX_FATAL_HW 0x10 -#define PX_FATAL_GOS 0x8 -#define PX_STUCK_FATAL 0x4 -#define PX_FATAL_SW 0x2 -#define PX_NONFATAL 0x1 -#define PX_OK DDI_FM_OK -#define PX_ERR_UNKNOWN 0x80 +#define PX_HW_RESET (0x1 << 5) +#define PX_PANIC (0x1 << 4) +#define PX_EXPECTED (0x1 << 3) +#define PX_PROTECTED (0x1 << 2) +#define PX_NO_PANIC (0x1 << 1) +#define PX_NO_ERROR (0x1 << 0) -#define PX_FM_FABRIC_CLASS PCIEX_FIRE ".fabric" -#define PX_FM_FABRIC_MSG_CODE "msg_code" -#define PX_FM_FABRIC_REQ_ID "req_id" +#define PX_HB (0x1 << 2) +#define PX_RP (0x1 << 1) +#define PX_RC (0x1 << 0) -#define PX_FABRIC_ERR_SEV(reg, chk, chk_gos) \ - ((reg & chk) ? ((reg & chk_gos) ? \ - PX_FATAL_GOS : PX_FATAL_SW) : PX_NONFATAL) +/* + * Generic PCIe Root Port Error Handling + * This struct must align with px_pec_err_t in sun4v/io/px/px_err.h + */ +typedef struct px_err_pcie { + uint32_t tx_hdr1; /* sysino */ + uint32_t tx_hdr2; /* sysino */ + uint32_t tx_hdr3; /* ehdl */ + uint32_t tx_hdr4; /* ehdl */ + uint32_t primary_ue; /* stick */ + uint32_t rsvd0; /* stick */ + uint32_t rsvd1; /* pec_desc */ + uint16_t pci_err_status; + uint16_t pcie_err_status; + uint32_t ce_reg; + uint32_t ue_reg; + uint32_t rx_hdr1; /* hdr[0] */ + uint32_t rx_hdr2; /* hdr[0] */ + uint32_t rx_hdr3; /* hdr[1] */ + uint32_t rx_hdr4; /* hdr[1] */ + uint32_t rsvd3; /* err_src_reg */ + uint32_t rsvd4; /* root err status */ +} px_err_pcie_t; + +#define PX_FM_BLOCK_HOST (0x1 << 0) +#define PX_FM_BLOCK_PCIE (0x1 << 1) +#define PX_FM_BLOCK_ALL (PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE) /* * Error handling FMA hook @@ -78,10 +104,8 @@ extern void px_fm_detach(px_t *px_p); extern int px_fm_init_child(dev_info_t *, dev_info_t *, int, ddi_iblock_cookie_t *); extern void px_fm_acc_setup(ddi_map_req_t *, dev_info_t *); -extern int px_handle_lookup(dev_info_t *, int, uint64_t, void *); extern int px_fm_callback(dev_info_t *, ddi_fm_error_t *, const void *); -extern int px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, - boolean_t checkjbc); +extern int px_err_cmn_intr(px_t *, ddi_fm_error_t *, int, int); /* * Fire interrupt handlers @@ -95,8 +119,12 @@ extern uint_t px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, * Common error handling functions */ extern void px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr); -#define PX_FM_PANIC \ - if (!panicstr) fm_panic +extern int px_err_check_eq(dev_info_t *dip); +extern int px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, + px_err_pcie_t *regs); +extern void px_err_panic(int err, int msg, int fab_err); +extern void px_rp_en_q(px_t *px_p, pcie_req_id_t fault_bdf, + uint32_t fault_addr, uint16_t s_status); #ifdef __cplusplus } diff --git a/usr/src/uts/sun4/io/px/px_pci.c b/usr/src/uts/sun4/io/px/px_pci.c index b156e7116a..264140b891 100644 --- a/usr/src/uts/sun4/io/px/px_pci.c +++ b/usr/src/uts/sun4/io/px/px_pci.c @@ -55,9 +55,30 @@ #include <sys/open.h> #include <sys/stat.h> #include <sys/file.h> +#include <sys/promif.h> /* prom_printf */ #include "pcie_pwr.h" #include "px_pci.h" -#include "px_debug.h" + +#if defined(DEBUG) +#define DBG pxb_dbg +static void pxb_dbg(uint_t bit, dev_info_t *dip, char *fmt, ...); +static uint_t pxb_dbg_print = 0; + +#else /* DEBUG */ + +#define DBG 0 && + +#endif /* DEBUG */ + +typedef enum { /* same sequence as px_debug_sym[] */ + /* 0 */ DBG_ATTACH, + /* 1 */ DBG_PWR +} pxb_debug_bit_t; + +static char *pxb_debug_sym [] = { /* same sequence as px_debug_bit */ + /* 0 */ "attach", + /* 1 */ "pwr" +}; /* Tunables. Beware: Some are for debug purpose only. */ /* @@ -199,7 +220,7 @@ static struct dev_ops pxb_ops = { static struct modldrv modldrv = { &mod_driverops, /* Type of module */ - "PCIe/PCI nexus driver 1.29", + "PCIe/PCI nexus driver %I%", &pxb_ops, /* driver ops */ }; @@ -584,6 +605,10 @@ pxb_ctlops(dev_info_t *dip, dev_info_t *rdip, int totreg; struct detachspec *ds; struct attachspec *as; + pxb_devstate_t *pxb_p; + + pxb_p = (pxb_devstate_t *)ddi_get_soft_state(pxb_state, + ddi_get_instance(dip)); switch (ctlop) { case DDI_CTLOPS_REPORTDEV: @@ -612,6 +637,9 @@ pxb_ctlops(dev_info_t *dip, dev_info_t *rdip, break; case DDI_CTLOPS_ATTACH: + if (!pcie_is_child(dip, rdip)) + return (DDI_SUCCESS); + as = (struct attachspec *)arg; switch (as->when) { case DDI_PRE: @@ -643,6 +671,8 @@ pxb_ctlops(dev_info_t *dip, dev_info_t *rdip, if (as->cmd == DDI_ATTACH && as->result != DDI_SUCCESS) pcie_pm_release(dip); + pf_init(rdip, (void *)pxb_p->pxb_fm_ibc); + /* * For hotplug-capable slots, we should explicitly * disable the errors, so that we won't panic upon @@ -667,8 +697,15 @@ pxb_ctlops(dev_info_t *dip, dev_info_t *rdip, break; case DDI_CTLOPS_DETACH: + if (!pcie_is_child(dip, rdip)) + return (DDI_SUCCESS); + ds = (struct detachspec *)arg; switch (ds->when) { + case DDI_PRE: + pf_fini(rdip); + return (DDI_SUCCESS); + case DDI_POST: if (ds->cmd == DDI_DETACH && ds->result == DDI_SUCCESS) { @@ -1733,8 +1770,7 @@ static int pxb_fm_err_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) { - pci_ereport_post(dip, derr, NULL); - return (derr->fme_status); + return (DDI_FM_OK); } /* @@ -2014,3 +2050,26 @@ pxb_dma_mctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle, cache_flags)); } #endif /* BCM_SW_WORKAROUNDS */ + +#ifdef DEBUG +static void +pxb_dbg(uint_t bit, dev_info_t *dip, char *fmt, ...) +{ + va_list ap; + + if (!(bit & pxb_dbg_print)) + return; + + if (dip) + prom_printf("%s(%d): %s", ddi_driver_name(dip), + ddi_get_instance(dip), pxb_debug_sym[bit]); +body: + va_start(ap, fmt); + if (ap) + prom_vprintf(fmt, ap); + else + prom_printf(fmt); + + va_end(ap); +} +#endif diff --git a/usr/src/uts/sun4/io/px/px_space.c b/usr/src/uts/sun4/io/px/px_space.c index 0cb5777d66..8fc809d4d5 100644 --- a/usr/src/uts/sun4/io/px/px_space.c +++ b/usr/src/uts/sun4/io/px/px_space.c @@ -30,10 +30,12 @@ */ #include <sys/types.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> #include <sys/cmn_err.h> #include <sys/time.h> #include <sys/pcie.h> -#include "px_space.h" +#include "px_obj.h" /*LINTLIBRARY*/ @@ -78,6 +80,11 @@ uint64_t px_perr_fatal = -1ull; uint64_t px_serr_fatal = -1ull; uint64_t px_errtrig_pa = 0x0; +char px_panic_hb_msg[] = " System bus"; +char px_panic_rc_msg[] = " PCIe root complex"; +char px_panic_rp_msg[] = " PCIe root port"; +char px_panic_fab_msg[] = " PCIe fabric"; + /* * The following flag controls behavior of the ino handler routine * when multiple interrupts are attached to a single ino. Typically @@ -157,24 +164,24 @@ uint32_t px_pwr_pil = PX_PWR_PIL; uint32_t px_max_l1_tries = PX_MAX_L1_TRIES; +/* Print and Log tunables. The following variables are booleans */ +#ifdef DEBUG +uint32_t px_log = PX_PANIC | PX_NO_PANIC | PX_PROTECTED | PX_HW_RESET; +#else +uint32_t px_log = PX_PANIC; +#endif +uint32_t px_die = PX_PANIC | PX_PROTECTED | PX_HW_RESET; + /* Fire PCIe Error that should cause panics */ -uint32_t px_fabric_die = 1; +boolean_t px_fabric_die = B_TRUE; +/* Root Complex PCIe Error bit flags that should cause panics */ uint32_t px_fabric_die_rc_ce = 0; -uint32_t px_fabric_die_rc_ue = PCIE_AER_UCE_UR | - PCIE_AER_UCE_TO | - PCIE_AER_UCE_RO | - PCIE_AER_UCE_FCP | - PCIE_AER_UCE_DLP | - PCIE_AER_UCE_ECRC | - PCIE_AER_UCE_PTLP | - PCIE_AER_UCE_MTLP; +uint32_t px_fabric_die_rc_ue = 0; -/* Fire PCIe Error that should cause panics even under protected access */ -uint32_t px_fabric_die_rc_ce_gos = 0; -uint32_t px_fabric_die_rc_ue_gos = PCIE_AER_UCE_RO | - PCIE_AER_UCE_FCP | - PCIE_AER_UCE_DLP; +/* Root Complex PCIe Error bit flags that should cause forgiven */ +uint32_t px_fabric_forgive_rc_ce = 0; +uint32_t px_fabric_forgive_rc_ue = 0; /* Fabric Error that should cause panics */ uint32_t px_fabric_die_ce = 0; @@ -184,9 +191,7 @@ uint32_t px_fabric_die_ue = PCIE_AER_UCE_UR | PCIE_AER_UCE_RO | PCIE_AER_UCE_FCP | PCIE_AER_UCE_DLP | - PCIE_AER_UCE_TRAINING | - PCIE_AER_UCE_PTLP | - PCIE_AER_UCE_MTLP; + PCIE_AER_UCE_TRAINING; /* Fabric Error that should cause panics even under protected access */ uint32_t px_fabric_die_ce_gos = 0; diff --git a/usr/src/uts/sun4/io/px/px_space.h b/usr/src/uts/sun4/io/px/px_space.h index 7ca21d0641..5b6b51afa0 100644 --- a/usr/src/uts/sun4/io/px/px_space.h +++ b/usr/src/uts/sun4/io/px/px_space.h @@ -34,6 +34,12 @@ extern "C" { #define PX_SPURINTR_MSG_DEFAULT -1ull +extern char px_panic_hb_msg[]; +extern char px_panic_rc_msg[]; +extern char px_panic_rp_msg[]; +extern char px_panic_fab_msg[]; + +extern uint_t px_max_errorq_size; extern ushort_t px_command_default; extern uint_t px_set_latency_timer_register; extern uint64_t px_perr_fatal; @@ -105,12 +111,16 @@ extern uint64_t px_lup_poll_interval; extern uint32_t px_pwr_pil; extern uint32_t px_max_l1_tries; +/* Print and Log tunables */ +extern uint32_t px_log; +extern uint32_t px_die; + /* Fabric Error that should cause panics */ -extern uint32_t px_fabric_die; +extern boolean_t px_fabric_die; extern uint32_t px_fabric_die_rc_ce; extern uint32_t px_fabric_die_rc_ue; -extern uint32_t px_fabric_die_rc_ce_gos; -extern uint32_t px_fabric_die_rc_ue_gos; +extern uint32_t px_fabric_forgive_rc_ce; +extern uint32_t px_fabric_forgive_rc_ue; extern uint32_t px_fabric_die_ce; extern uint32_t px_fabric_die_ue; extern uint32_t px_fabric_die_ce_gos; diff --git a/usr/src/uts/sun4/io/px/px_var.h b/usr/src/uts/sun4/io/px/px_var.h index 692e108d46..47b1e0f6f2 100644 --- a/usr/src/uts/sun4/io/px/px_var.h +++ b/usr/src/uts/sun4/io/px/px_var.h @@ -146,6 +146,13 @@ struct px { /* CPR callback id */ callb_id_t px_cprcb_id; uint32_t px_dma_sync_opt; /* DMA syncing req. of hw */ + + /* Handle for soft intr */ + ddi_softint_handle_t px_dbg_hdl; /* HDL for dbg printing */ + + /* array to keep track of register snapshots during error handling */ + int px_dq_tail; /* last valid index in cs array */ + pf_data_t *px_dq_p; }; /* px soft state flag */ diff --git a/usr/src/uts/sun4u/io/pci/pci_pci.c b/usr/src/uts/sun4u/io/pci/pci_pci.c index 625e9e86cc..108b9f5140 100644 --- a/usr/src/uts/sun4u/io/pci/pci_pci.c +++ b/usr/src/uts/sun4u/io/pci/pci_pci.c @@ -36,8 +36,8 @@ #include <sys/autoconf.h> #include <sys/ddi_impldefs.h> #include <sys/ddi_subrdefs.h> -#include <sys/pci.h> -#include <sys/pci_impl.h> +#include <sys/pcie.h> +#include <sys/pcie_impl.h> #include <sys/pci_cap.h> #include <sys/pci/pci_nexus.h> #include <sys/pci/pci_regs.h> @@ -140,7 +140,6 @@ static int ppb_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp); static int ppb_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, char *name, caddr_t valuep, int *lengthp); -static int ppb_get_bdf_from_dip(dev_info_t *dip, uint32_t *bdf); static struct cb_ops ppb_cb_ops = { ppb_open, /* open */ @@ -244,6 +243,8 @@ typedef struct { #define PPB_SOFT_STATE_OPEN_EXCL 0x02 int fm_cap; ddi_iblock_cookie_t fm_ibc; + + uint8_t parent_bus; } ppb_devstate_t; /* @@ -291,6 +292,7 @@ static void ppb_fm_fini(ppb_devstate_t *ppb_p); static void ppb_removechild(dev_info_t *); static int ppb_initchild(dev_info_t *child); +static void ppb_uninitchild(dev_info_t *child); static dev_info_t *get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip); static void ppb_pwr_setup(ppb_devstate_t *ppb, dev_info_t *dip); static void ppb_pwr_teardown(ppb_devstate_t *ppb, dev_info_t *dip); @@ -545,8 +547,13 @@ ppb_ctlops(dev_info_t *dip, dev_info_t *rdip, pci_regspec_t *drv_regp; int reglen; int rn; - + struct attachspec *as; + struct detachspec *ds; int totreg; + ppb_devstate_t *ppb_p; + + ppb_p = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, + ddi_get_instance(dip)); switch (ctlop) { case DDI_CTLOPS_REPORTDEV: @@ -562,7 +569,29 @@ ppb_ctlops(dev_info_t *dip, dev_info_t *rdip, return (ppb_initchild((dev_info_t *)arg)); case DDI_CTLOPS_UNINITCHILD: - ppb_removechild((dev_info_t *)arg); + ppb_uninitchild((dev_info_t *)arg); + return (DDI_SUCCESS); + + case DDI_CTLOPS_ATTACH: + if (!pcie_is_child(dip, rdip)) + return (DDI_SUCCESS); + + as = (struct attachspec *)arg; + if ((ppb_p->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) && + (as->when == DDI_POST)) + pf_init(rdip, ppb_p->fm_ibc); + + return (DDI_SUCCESS); + + case DDI_CTLOPS_DETACH: + if (!pcie_is_child(dip, rdip)) + return (DDI_SUCCESS); + + ds = (struct detachspec *)arg; + if ((ppb_p->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) && + (ds->when == DDI_PRE)) + pf_fini(rdip); + return (DDI_SUCCESS); case DDI_CTLOPS_SIDDEV: @@ -750,7 +779,6 @@ ppb_initchild(dev_info_t *child) uchar_t header_type; uchar_t min_gnt, latency_timer; ppb_devstate_t *ppb; - pci_parent_data_t *pd_p; /* * Name the child @@ -798,11 +826,11 @@ ppb_initchild(dev_info_t *child) return (DDI_NOT_WELL_FORMED); } - ddi_set_parent_data(child, NULL); - ppb = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, ddi_get_instance(ddi_get_parent(child))); + ddi_set_parent_data(child, NULL); + /* * If hardware is PM capable, set up the power info structure. * This also ensures the the bus will not be off (0MHz) otherwise @@ -866,7 +894,7 @@ ppb_initchild(dev_info_t *child) * If the device has a bus control register then program it * based on the settings in the command register. */ - if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) { + if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) { bcr = pci_config_get8(config_handle, PCI_BCNF_BCNTRL); if (ppb_command_default & PCI_COMM_PARITY_DETECT) bcr |= PCI_BCNF_BCNTRL_PARITY_ENABLE; @@ -917,6 +945,18 @@ ppb_initchild(dev_info_t *child) } /* + * SPARC PCIe FMA specific + * + * Note: parent_data for parent is created only if this is sparc PCI-E + * platform, for which, SG take a different route to handle device + * errors. + */ + if (ppb->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) { + if (pcie_init_ppd(child) == NULL) + return (DDI_FAILURE); + } + + /* * Check to see if the XMITS/PCI-X workaround applies. */ n = ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_NOTPROM, @@ -929,38 +969,31 @@ ppb_initchild(dev_info_t *child) pcix_set_cmd_reg(child, n); } - /* Allocate memory for pci parent data */ - pd_p = kmem_zalloc(sizeof (pci_parent_data_t), KM_SLEEP); + /* since cached, teardown config handle in ppb_uninitchild() */ + return (DDI_SUCCESS); +} - /* - * Retrieve and save BDF and PCIE2PCI bridge's secondary bus - * information in the parent private data structure. - */ - if (ppb_get_bdf_from_dip(child, &pd_p->pci_bdf) != DDI_SUCCESS) { - kmem_free(pd_p, sizeof (pci_parent_data_t)); - pci_config_teardown(&config_handle); - return (DDI_FAILURE); - } +static void +ppb_uninitchild(dev_info_t *child) +{ + ppb_devstate_t *ppb; - pd_p->pci_sec_bus = ddi_prop_get_int(DDI_DEV_T_ANY, child, 0, - "pcie2pci-sec-bus", 0); + ppb = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, + ddi_get_instance(ddi_get_parent(child))); - ddi_set_parent_data(child, (void *)pd_p); - pci_config_teardown(&config_handle); + /* + * SG OPL FMA specific + */ + if (ppb->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) + pcie_uninit_ppd(child); - return (DDI_SUCCESS); + ppb_removechild(child); } static void ppb_removechild(dev_info_t *dip) { ppb_devstate_t *ppb; - pci_parent_data_t *pd_p; - - if (pd_p = ddi_get_parent_data(dip)) { - ddi_set_parent_data(dip, NULL); - kmem_free(pd_p, sizeof (pci_parent_data_t)); - } ppb = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, ddi_get_instance(ddi_get_parent(dip))); @@ -1648,34 +1681,16 @@ static int ppb_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, return (ddi_prop_op(dev, dip, prop_op, flags, name, valuep, lengthp)); } -static int -ppb_get_bdf_from_dip(dev_info_t *dip, uint32_t *bdf) -{ - pci_regspec_t *regspec; - int reglen; - - if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, - "reg", (int **)®spec, (uint_t *)®len) != DDI_SUCCESS) - return (DDI_FAILURE); - - if (reglen < (sizeof (pci_regspec_t) / sizeof (int))) { - ddi_prop_free(regspec); - return (DDI_FAILURE); - } - - /* Get phys_hi from first element. All have same bdf. */ - *bdf = (regspec->pci_phys_hi & (PCI_REG_BDFR_M ^ PCI_REG_REG_M)) >> 8; - - ddi_prop_free(regspec); - return (DDI_SUCCESS); -} - /* * Initialize our FMA resources */ static void ppb_fm_init(ppb_devstate_t *ppb_p) { + dev_info_t *root = ddi_root_node(); + dev_info_t *pdip; + char *bus; + ppb_p->fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; @@ -1693,6 +1708,21 @@ ppb_fm_init(ppb_devstate_t *ppb_p) * Register error callback with our parent. */ ddi_fm_handler_register(ppb_p->dip, ppb_err_callback, NULL); + + ppb_p->parent_bus = PCIE_PCIECAP_DEV_TYPE_PCI_DEV; + for (pdip = ddi_get_parent(ppb_p->dip); pdip && (pdip != root) && + (ppb_p->parent_bus != PCIE_PCIECAP_DEV_TYPE_PCIE_DEV); + pdip = ddi_get_parent(pdip)) { + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, + DDI_PROP_DONTPASS, "device_type", &bus) != + DDI_PROP_SUCCESS) + break; + + if (strcmp(bus, "pciex") == 0) + ppb_p->parent_bus = PCIE_PCIECAP_DEV_TYPE_PCIE_DEV; + + ddi_prop_free(bus); + } } /* @@ -1730,6 +1760,18 @@ ppb_fm_init_child(dev_info_t *dip, dev_info_t *tdip, int cap, static int ppb_err_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) { + ppb_devstate_t *ppb_p = (ppb_devstate_t *)ddi_get_soft_state(ppb_state, + ddi_get_instance(dip)); + + /* + * errors handled by SPARC PCI-E framework for PCIe platforms + */ + if (ppb_p->parent_bus == PCIE_PCIECAP_DEV_TYPE_PCIE_DEV) + return (DDI_FM_OK); + + /* + * do the following for SPARC PCI platforms + */ ASSERT(impl_data == NULL); pci_ereport_post(dip, derr, NULL); return (derr->fme_status); diff --git a/usr/src/uts/sun4u/io/px/px_err.c b/usr/src/uts/sun4u/io/px/px_err.c index 0cdf53df01..8a11df9e16 100644 --- a/usr/src/uts/sun4u/io/px/px_err.c +++ b/usr/src/uts/sun4u/io/px/px_err.c @@ -133,37 +133,37 @@ uint64_t px_lpug_count_mask = PX_ERR_EN_ALL; PX_ERPT_SEND(erpt), \ PX_ERR_JBC_CLASS(bit) px_err_bit_desc_t px_err_jbc_tbl[] = { - /* JBC FATAL - see io erpt doc, section 1.1 */ - { JBC_BIT_DESC(MB_PEA, fatal_hw, jbc_fatal) }, - { JBC_BIT_DESC(CPE, fatal_hw, jbc_fatal) }, - { JBC_BIT_DESC(APE, fatal_hw, jbc_fatal) }, - { JBC_BIT_DESC(PIO_CPE, fatal_hw, jbc_fatal) }, - { JBC_BIT_DESC(JTCEEW, fatal_hw, jbc_fatal) }, - { JBC_BIT_DESC(JTCEEI, fatal_hw, jbc_fatal) }, - { JBC_BIT_DESC(JTCEER, fatal_hw, jbc_fatal) }, - - /* JBC MERGE - see io erpt doc, section 1.2 */ + /* JBC FATAL */ + { JBC_BIT_DESC(MB_PEA, hw_reset, jbc_fatal) }, + { JBC_BIT_DESC(CPE, hw_reset, jbc_fatal) }, + { JBC_BIT_DESC(APE, hw_reset, jbc_fatal) }, + { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_fatal) }, + { JBC_BIT_DESC(JTCEEW, hw_reset, jbc_fatal) }, + { JBC_BIT_DESC(JTCEEI, hw_reset, jbc_fatal) }, + { JBC_BIT_DESC(JTCEER, hw_reset, jbc_fatal) }, + + /* JBC MERGE */ { JBC_BIT_DESC(MB_PER, jbc_merge, jbc_merge) }, { JBC_BIT_DESC(MB_PEW, jbc_merge, jbc_merge) }, - /* JBC Jbusint IN - see io erpt doc, section 1.3 */ - { JBC_BIT_DESC(UE_ASYN, fatal_gos, jbc_in) }, - { JBC_BIT_DESC(CE_ASYN, non_fatal, jbc_in) }, - { JBC_BIT_DESC(JTE, fatal_gos, jbc_in) }, - { JBC_BIT_DESC(JBE, jbc_jbusint_in, jbc_in) }, - { JBC_BIT_DESC(JUE, jbc_jbusint_in, jbc_in) }, - { JBC_BIT_DESC(ICISE, fatal_gos, jbc_in) }, + /* JBC Jbusint IN */ + { JBC_BIT_DESC(UE_ASYN, panic, jbc_in) }, + { JBC_BIT_DESC(CE_ASYN, no_error, jbc_in) }, + { JBC_BIT_DESC(JTE, panic, jbc_in) }, + { JBC_BIT_DESC(JBE, panic, jbc_in) }, + { JBC_BIT_DESC(JUE, panic, jbc_in) }, + { JBC_BIT_DESC(ICISE, panic, jbc_in) }, { JBC_BIT_DESC(WR_DPE, jbc_jbusint_in, jbc_in) }, { JBC_BIT_DESC(RD_DPE, jbc_jbusint_in, jbc_in) }, - { JBC_BIT_DESC(ILL_BMW, jbc_jbusint_in, jbc_in) }, - { JBC_BIT_DESC(ILL_BMR, jbc_jbusint_in, jbc_in) }, - { JBC_BIT_DESC(BJC, jbc_jbusint_in, jbc_in) }, + { JBC_BIT_DESC(ILL_BMW, panic, jbc_in) }, + { JBC_BIT_DESC(ILL_BMR, panic, jbc_in) }, + { JBC_BIT_DESC(BJC, panic, jbc_in) }, - /* JBC Jbusint Out - see io erpt doc, section 1.4 */ - { JBC_BIT_DESC(IJP, fatal_gos, jbc_out) }, + /* JBC Jbusint Out */ + { JBC_BIT_DESC(IJP, panic, jbc_out) }, /* - * JBC Dmcint ODCD - see io erpt doc, section 1.5 + * JBC Dmcint ODCD * * Error bits which can be set via a bad PCItool access go through * jbc_safe_acc instead. @@ -172,15 +172,15 @@ px_err_bit_desc_t px_err_jbc_tbl[] = { { JBC_BIT_DESC(ILL_ACC_RD, jbc_safe_acc, jbc_odcd) }, { JBC_BIT_DESC(PIO_UNMAP, jbc_safe_acc, jbc_odcd) }, { JBC_BIT_DESC(PIO_DPE, jbc_dmcint_odcd, jbc_odcd) }, - { JBC_BIT_DESC(PIO_CPE, non_fatal, jbc_odcd) }, + { JBC_BIT_DESC(PIO_CPE, hw_reset, jbc_odcd) }, { JBC_BIT_DESC(ILL_ACC, jbc_safe_acc, jbc_odcd) }, - /* JBC Dmcint IDC - see io erpt doc, section 1.6 */ - { JBC_BIT_DESC(UNSOL_RD, non_fatal, jbc_idc) }, - { JBC_BIT_DESC(UNSOL_INTR, non_fatal, jbc_idc) }, + /* JBC Dmcint IDC */ + { JBC_BIT_DESC(UNSOL_RD, no_panic, jbc_idc) }, + { JBC_BIT_DESC(UNSOL_INTR, no_panic, jbc_idc) }, - /* JBC CSR - see io erpt doc, section 1.7 */ - { JBC_BIT_DESC(EBUS_TO, jbc_csr, jbc_csr) } + /* JBC CSR */ + { JBC_BIT_DESC(EBUS_TO, panic, jbc_csr) } }; #define px_err_jbc_keys \ @@ -202,17 +202,17 @@ px_err_bit_desc_t px_err_jbc_tbl[] = { PX_ERR_UBC_CLASS(bit) px_err_bit_desc_t px_err_ubc_tbl[] = { /* UBC FATAL */ - { UBC_BIT_DESC(DMARDUEA, non_fatal, ubc_fatal) }, - { UBC_BIT_DESC(DMAWTUEA, fatal_sw, ubc_fatal) }, - { UBC_BIT_DESC(MEMRDAXA, fatal_sw, ubc_fatal) }, - { UBC_BIT_DESC(MEMWTAXA, fatal_sw, ubc_fatal) }, - { UBC_BIT_DESC(DMARDUEB, non_fatal, ubc_fatal) }, - { UBC_BIT_DESC(DMAWTUEB, fatal_sw, ubc_fatal) }, - { UBC_BIT_DESC(MEMRDAXB, fatal_sw, ubc_fatal) }, - { UBC_BIT_DESC(MEMWTAXB, fatal_sw, ubc_fatal) }, - { UBC_BIT_DESC(PIOWTUE, fatal_sw, ubc_fatal) }, - { UBC_BIT_DESC(PIOWBEUE, fatal_sw, ubc_fatal) }, - { UBC_BIT_DESC(PIORBEUE, fatal_sw, ubc_fatal) } + { UBC_BIT_DESC(DMARDUEA, no_panic, ubc_fatal) }, + { UBC_BIT_DESC(DMAWTUEA, panic, ubc_fatal) }, + { UBC_BIT_DESC(MEMRDAXA, panic, ubc_fatal) }, + { UBC_BIT_DESC(MEMWTAXA, panic, ubc_fatal) }, + { UBC_BIT_DESC(DMARDUEB, no_panic, ubc_fatal) }, + { UBC_BIT_DESC(DMAWTUEB, panic, ubc_fatal) }, + { UBC_BIT_DESC(MEMRDAXB, panic, ubc_fatal) }, + { UBC_BIT_DESC(MEMWTAXB, panic, ubc_fatal) }, + { UBC_BIT_DESC(PIOWTUE, panic, ubc_fatal) }, + { UBC_BIT_DESC(PIOWBEUE, panic, ubc_fatal) }, + { UBC_BIT_DESC(PIORBEUE, panic, ubc_fatal) } }; #define px_err_ubc_keys \ @@ -242,20 +242,20 @@ char *ubc_class_eid_qualifier[] = { PX_ERPT_SEND(erpt), \ PX_ERR_DMC_CLASS(bit) px_err_bit_desc_t px_err_imu_tbl[] = { - /* DMC IMU RDS - see io erpt doc, section 2.1 */ - { IMU_BIT_DESC(MSI_MAL_ERR, non_fatal, imu_rds) }, - { IMU_BIT_DESC(MSI_PAR_ERR, fatal_stuck, imu_rds) }, - { IMU_BIT_DESC(PMEACK_MES_NOT_EN, imu_rbne, imu_rds) }, - { IMU_BIT_DESC(PMPME_MES_NOT_EN, imu_pme, imu_rds) }, - { IMU_BIT_DESC(FATAL_MES_NOT_EN, imu_rbne, imu_rds) }, - { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, imu_rbne, imu_rds) }, - { IMU_BIT_DESC(COR_MES_NOT_EN, imu_rbne, imu_rds) }, - { IMU_BIT_DESC(MSI_NOT_EN, imu_rbne, imu_rds) }, - - /* DMC IMU SCS - see io erpt doc, section 2.2 */ - { IMU_BIT_DESC(EQ_NOT_EN, imu_rbne, imu_rds) }, - - /* DMC IMU - see io erpt doc, section 2.3 */ + /* DMC IMU RDS */ + { IMU_BIT_DESC(MSI_MAL_ERR, panic, imu_rds) }, + { IMU_BIT_DESC(MSI_PAR_ERR, panic, imu_rds) }, + { IMU_BIT_DESC(PMEACK_MES_NOT_EN, panic, imu_rds) }, + { IMU_BIT_DESC(PMPME_MES_NOT_EN, panic, imu_rds) }, + { IMU_BIT_DESC(FATAL_MES_NOT_EN, panic, imu_rds) }, + { IMU_BIT_DESC(NONFATAL_MES_NOT_EN, panic, imu_rds) }, + { IMU_BIT_DESC(COR_MES_NOT_EN, panic, imu_rds) }, + { IMU_BIT_DESC(MSI_NOT_EN, panic, imu_rds) }, + + /* DMC IMU SCS */ + { IMU_BIT_DESC(EQ_NOT_EN, panic, imu_rds) }, + + /* DMC IMU */ { IMU_BIT_DESC(EQ_OVER, imu_eq_ovfl, imu) } }; @@ -274,21 +274,21 @@ px_err_bit_desc_t px_err_imu_tbl[] = { PX_ERPT_SEND(erpt), \ PX_ERR_DMC_CLASS(bit) px_err_bit_desc_t px_err_mmu_tbl[] = { - /* DMC MMU TFAR/TFSR - see io erpt doc, section 2.4 */ + /* DMC MMU TFAR/TFSR */ { MMU_BIT_DESC(BYP_ERR, mmu_rbne, mmu_tfar_tfsr) }, { MMU_BIT_DESC(BYP_OOR, mmu_tfa, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TRN_ERR, mmu_rbne, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TRN_ERR, panic, mmu_tfar_tfsr) }, { MMU_BIT_DESC(TRN_OOR, mmu_tfa, mmu_tfar_tfsr) }, { MMU_BIT_DESC(TTE_INV, mmu_tfa, mmu_tfar_tfsr) }, { MMU_BIT_DESC(TTE_PRT, mmu_tfa, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TTC_DPE, mmu_tfa, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TBW_DME, mmu_tblwlk, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TBW_UDE, mmu_tblwlk, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TBW_ERR, mmu_tblwlk, mmu_tfar_tfsr) }, - { MMU_BIT_DESC(TBW_DPE, mmu_tblwlk, mmu_tfar_tfsr) }, - - /* DMC MMU - see io erpt doc, section 2.5 */ - { MMU_BIT_DESC(TTC_CAE, non_fatal, mmu) } + { MMU_BIT_DESC(TTC_DPE, mmu_parity, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TBW_DME, panic, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TBW_UDE, panic, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TBW_ERR, panic, mmu_tfar_tfsr) }, + { MMU_BIT_DESC(TBW_DPE, mmu_parity, mmu_tfar_tfsr) }, + + /* DMC MMU */ + { MMU_BIT_DESC(TTC_CAE, panic, mmu) } }; #define px_err_mmu_keys (sizeof (px_err_mmu_tbl)) / (sizeof (px_err_bit_desc_t)) @@ -308,8 +308,8 @@ px_err_bit_desc_t px_err_mmu_tbl[] = { PX_ERPT_SEND(erpt), \ PX_ERR_PEC_CLASS(bit) px_err_bit_desc_t px_err_ilu_tbl[] = { - /* PEC ILU none - see io erpt doc, section 3.1 */ - { ILU_BIT_DESC(IHB_PE, fatal_gos, pec_ilu) } + /* PEC ILU none */ + { ILU_BIT_DESC(IHB_PE, panic, pec_ilu) } }; #define px_err_ilu_keys \ (sizeof (px_err_ilu_tbl)) / (sizeof (px_err_bit_desc_t)) @@ -342,20 +342,20 @@ px_err_bit_desc_t px_err_ilu_tbl[] = { PX_ERPT_SEND(erpt), \ PX_ERR_PEC_CLASS(bit) px_err_bit_desc_t px_err_tlu_ue_tbl[] = { - /* PCI-E Receive Uncorrectable Errors - see io erpt doc, section 3.2 */ + /* PCI-E Receive Uncorrectable Errors */ { TLU_UC_BIT_DESC(UR, pciex_ue, pciex_rx_ue) }, { TLU_UC_BIT_DESC(UC, pciex_ue, pciex_rx_ue) }, - /* PCI-E Transmit Uncorrectable Errors - see io erpt doc, section 3.3 */ + /* PCI-E Transmit Uncorrectable Errors */ { TLU_UC_OB_BIT_DESC(ECRC, pciex_ue, pciex_rx_ue) }, { TLU_UC_BIT_DESC(CTO, pciex_ue, pciex_tx_ue) }, { TLU_UC_BIT_DESC(ROF, pciex_ue, pciex_tx_ue) }, - /* PCI-E Rx/Tx Uncorrectable Errors - see io erpt doc, section 3.4 */ + /* PCI-E Rx/Tx Uncorrectable Errors */ { TLU_UC_BIT_DESC(MFP, pciex_ue, pciex_rx_tx_ue) }, { TLU_UC_BIT_DESC(PP, pciex_ue, pciex_rx_tx_ue) }, - /* Other PCI-E Uncorrectable Errors - see io erpt doc, section 3.5 */ + /* Other PCI-E Uncorrectable Errors */ { TLU_UC_BIT_DESC(FCP, pciex_ue, pciex_ue) }, { TLU_UC_BIT_DESC(DLP, pciex_ue, pciex_ue) }, { TLU_UC_BIT_DESC(TE, pciex_ue, pciex_ue) }, @@ -384,7 +384,7 @@ px_err_bit_desc_t px_err_tlu_ue_tbl[] = { PX_ERPT_SEND(erpt), \ PX_ERR_PEC_CLASS(bit) px_err_bit_desc_t px_err_tlu_ce_tbl[] = { - /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ + /* PCI-E Correctable Errors */ { TLU_CE_BIT_DESC(RTO, pciex_ce, pciex_ce) }, { TLU_CE_BIT_DESC(RNR, pciex_ce, pciex_ce) }, { TLU_CE_BIT_DESC(BDP, pciex_ce, pciex_ce) }, @@ -419,31 +419,29 @@ px_err_bit_desc_t px_err_tlu_ce_tbl[] = { PX_ERPT_SEND(erpt), \ PX_ERR_PEC_OB_CLASS(bit) px_err_bit_desc_t px_err_tlu_oe_tbl[] = { - /* - * TLU Other Event Status (receive only) - see io erpt doc, section 3.7 - */ - { TLU_OE_BIT_DESC(MRC, fatal_hw, pciex_rx_oe) }, - - /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ - { TLU_OE_BIT_DESC(WUC, non_fatal, pciex_rx_tx_oe) }, - { TLU_OE_BIT_DESC(RUC, non_fatal, pciex_rx_tx_oe) }, - { TLU_OE_BIT_DESC(CRS, non_fatal, pciex_rx_tx_oe) }, - - /* TLU Other Event - see io erpt doc, section 3.9 */ - { TLU_OE_BIT_DESC(IIP, fatal_gos, pciex_oe) }, - { TLU_OE_BIT_DESC(EDP, fatal_gos, pciex_oe) }, - { TLU_OE_BIT_DESC(EHP, fatal_gos, pciex_oe) }, - { TLU_OE_OB_BIT_DESC(TLUEITMO, fatal_gos, pciex_oe) }, - { TLU_OE_BIT_DESC(LIN, non_fatal, pciex_oe) }, - { TLU_OE_BIT_DESC(LRS, non_fatal, pciex_oe) }, + /* TLU Other Event Status (receive only) */ + { TLU_OE_BIT_DESC(MRC, hw_reset, pciex_rx_oe) }, + + /* TLU Other Event Status (rx + tx) */ + { TLU_OE_BIT_DESC(WUC, wuc_ruc, pciex_rx_tx_oe) }, + { TLU_OE_BIT_DESC(RUC, wuc_ruc, pciex_rx_tx_oe) }, + { TLU_OE_BIT_DESC(CRS, no_panic, pciex_rx_tx_oe) }, + + /* TLU Other Event */ + { TLU_OE_BIT_DESC(IIP, panic, pciex_oe) }, + { TLU_OE_BIT_DESC(EDP, panic, pciex_oe) }, + { TLU_OE_BIT_DESC(EHP, panic, pciex_oe) }, + { TLU_OE_OB_BIT_DESC(TLUEITMO, panic, pciex_oe) }, + { TLU_OE_BIT_DESC(LIN, no_panic, pciex_oe) }, + { TLU_OE_BIT_DESC(LRS, no_panic, pciex_oe) }, { TLU_OE_BIT_DESC(LDN, tlu_ldn, pciex_oe) }, { TLU_OE_BIT_DESC(LUP, tlu_lup, pciex_oe) }, - { TLU_OE_BIT_DESC(ERU, fatal_gos, pciex_oe) }, - { TLU_OE_BIT_DESC(ERO, fatal_gos, pciex_oe) }, - { TLU_OE_BIT_DESC(EMP, fatal_gos, pciex_oe) }, - { TLU_OE_BIT_DESC(EPE, fatal_gos, pciex_oe) }, - { TLU_OE_BIT_DESC(ERP, fatal_gos, pciex_oe) }, - { TLU_OE_BIT_DESC(EIP, fatal_gos, pciex_oe) } + { TLU_OE_BIT_DESC(ERU, panic, pciex_oe) }, + { TLU_OE_BIT_DESC(ERO, panic, pciex_oe) }, + { TLU_OE_BIT_DESC(EMP, panic, pciex_oe) }, + { TLU_OE_BIT_DESC(EPE, panic, pciex_oe) }, + { TLU_OE_BIT_DESC(ERP, panic, pciex_oe) }, + { TLU_OE_BIT_DESC(EIP, panic, pciex_oe) } }; #define px_err_tlu_oe_keys \ @@ -633,7 +631,7 @@ typedef struct px_err_ss { uint64_t err_status[PX_ERR_REG_KEYS]; } px_err_ss_t; -static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, boolean_t chk_cb); +static void px_err_snapshot(px_t *px_p, px_err_ss_t *ss, int block); static int px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss); static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, @@ -644,10 +642,7 @@ static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, * Interrupt handler for the JBC/UBC block. * o lock * o create derr - * o px_err_handle(leaf1, with cb) - * o px_err_handle(leaf2, without cb) - * o dispatch (leaf1) - * o dispatch (leaf2) + * o px_err_cmn_intr * o unlock * o handle error: fatal? fm_panic() : return INTR_CLAIMED) */ @@ -657,9 +652,7 @@ px_err_cb_intr(caddr_t arg) px_fault_t *px_fault_p = (px_fault_t *)arg; dev_info_t *rpdip = px_fault_p->px_fh_dip; px_t *px_p = DIP_TO_STATE(rpdip); - int err = PX_OK; - int ret = DDI_FM_OK; - int fatal = 0; + int err; ddi_fm_error_t derr; /* Create the derr */ @@ -670,44 +663,24 @@ px_err_cb_intr(caddr_t arg) mutex_enter(&px_p->px_fm_mutex); - err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); - - ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); - switch (ret) { - case DDI_FM_FATAL: - fatal++; - break; - case DDI_FM_NONFATAL: - case DDI_FM_UNKNOWN: - default: - break; - } - - /* Set the intr state to idle for the leaf that received the mondo */ - + err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_HOST); (void) px_lib_intr_setstate(rpdip, px_fault_p->px_fh_sysino, INTR_IDLE_STATE); mutex_exit(&px_p->px_fm_mutex); - /* - * PX_FATAL_HW error is diagnosed after system recovered from - * HW initiated reset, therefore no furthur handling is required. - */ - if (fatal || err & (PX_FATAL_GOS | PX_FATAL_SW)) - PX_FM_PANIC("Fatal System Bus Error has occurred\n"); + px_err_panic(err, PX_HB, PX_NO_ERROR); return (DDI_INTR_CLAIMED); } - /* * px_err_dmc_pec_intr: * Interrupt handler for the DMC/PEC block. * o lock * o create derr - * o px_err_handle(leaf, with cb) - * o dispatch (leaf) + * o px_err_cmn_intr(leaf, with out cb) + * o pcie_scan_fabric (leaf) * o unlock * o handle error: fatal? fm_panic() : return INTR_CLAIMED) */ @@ -717,8 +690,7 @@ px_err_dmc_pec_intr(caddr_t arg) px_fault_t *px_fault_p = (px_fault_t *)arg; dev_info_t *rpdip = px_fault_p->px_fh_dip; px_t *px_p = DIP_TO_STATE(rpdip); - int err = PX_OK; - int ret = DDI_FM_OK; + int rc_err, fab_err = PF_NO_PANIC; ddi_fm_error_t derr; /* Create the derr */ @@ -730,11 +702,12 @@ px_err_dmc_pec_intr(caddr_t arg) mutex_enter(&px_p->px_fm_mutex); /* send ereport/handle/clear fire registers */ - err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); + rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); /* Check all child devices for errors */ if (!px_lib_is_in_drain_state(px_p)) { - ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); + fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, + &px_p->px_dq_tail); } /* Set the interrupt state to idle */ @@ -743,12 +716,7 @@ px_err_dmc_pec_intr(caddr_t arg) mutex_exit(&px_p->px_fm_mutex); - /* - * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, - * therefore it does not cause panic. - */ - if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) - PX_FM_PANIC("Fatal System Port Error has occurred\n"); + px_err_panic(rc_err, PX_RC, fab_err); return (DDI_INTR_CLAIMED); } @@ -831,31 +799,31 @@ px_err_reg_setup_pcie(uint8_t chip_mask, caddr_t csr_base, boolean_t enable) } /* - * px_err_handle: + * px_err_cmn_intr: * Common function called by trap, mondo and fabric intr. * o Snap shot current fire registers * o check for safe access * o send ereport and clear snap shot registers + * o create and queue RC info for later use in fabric scan. + * o RUC/WUC, PTLP, MMU Errors(CA), UR * o check severity of snap shot registers * * @param px_p leaf in which to check access * @param derr fm err data structure to be updated * @param caller PX_TRAP_CALL | PX_INTR_CALL - * @param chk_cb whether to handle cb registers - * @return err PX_OK | PX_NONFATAL | - * PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL + * @param block PX_FM_BLOCK_HOST | PX_FM_BLOCK_PCIE | PX_FM_BLOCK_ALL + * @return err PX_NO_PANIC | PX_PANIC | PX_HW_RESET | PX_PROTECTED */ int -px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, - boolean_t chk_cb) +px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) { px_err_ss_t ss = {0}; - int err = PX_OK; + int err; ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); /* snap shot the current fire registers */ - px_err_snapshot(px_p, &ss, chk_cb); + px_err_snapshot(px_p, &ss, block); /* check for safe access */ px_err_safeacc_check(px_p, derr); @@ -867,7 +835,7 @@ px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, err = px_err_check_severity(px_p, derr, err, caller); /* Mark the On Trap Handle if an error occured */ - if (err != PX_OK) { + if (err != PX_NO_ERROR) { px_pec_t *pec_p = px_p->px_pec_p; on_trap_data_t *otd = pec_p->pec_ontrap_data; @@ -885,18 +853,19 @@ px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, /* * px_err_snapshot: * Take a current snap shot of all the fire error registers. This includes - * JBC/UBC, DMC, and PEC, unless chk_cb == false; + * JBC/UBC, DMC, and PEC depending on the block flag * * @param px_p leaf in which to take the snap shot. * @param ss pre-allocated memory to store the snap shot. * @param chk_cb boolean on whether to store jbc/ubc register. */ static void -px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, boolean_t chk_cb) +px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, int block) { pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; caddr_t xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC]; caddr_t pec_csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR]; + caddr_t csr_base; uint8_t chip_mask = 1 << PX_CHIP_TYPE(pxu_p); const px_err_reg_desc_t *reg_desc_p = px_err_reg_tbl; px_err_id_t reg_id; @@ -904,11 +873,20 @@ px_err_snapshot(px_t *px_p, px_err_ss_t *ss_p, boolean_t chk_cb) for (reg_id = 0; reg_id < PX_ERR_REG_KEYS; reg_id++, reg_desc_p++) { if (!(reg_desc_p->chip_mask & chip_mask)) continue; - ss_p->err_status[reg_id] = - (reg_desc_p->reg_bank == PX_REG_CSR) ? - CSR_XR(pec_csr_base, reg_desc_p->status_addr) : - (chk_cb ? - CSR_XR(xbc_csr_base, reg_desc_p->status_addr) : 0); + + if ((block & PX_FM_BLOCK_HOST) && + (reg_desc_p->reg_bank == PX_REG_XBC)) + csr_base = xbc_csr_base; + else if ((block & PX_FM_BLOCK_PCIE) && + (reg_desc_p->reg_bank == PX_REG_CSR)) + csr_base = pec_csr_base; + else { + ss_p->err_status[reg_id] = 0; + continue; + } + + ss_p->err_status[reg_id] = CSR_XR(csr_base, + reg_desc_p->status_addr); } } @@ -934,15 +912,15 @@ px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss_p) px_err_bit_desc_t *err_bit_tbl; px_err_bit_desc_t *err_bit_desc; - uint64_t *log_mask, *count_mask; - uint64_t status_addr, clear_addr; + uint64_t *count_mask; + uint64_t clear_addr; uint64_t ss_reg; int (*err_handler)(); int (*erpt_handler)(); - px_err_id_t reg_id, key; - int err = PX_OK; - int biterr; + int reg_id, key; + int err = PX_NO_ERROR; + int biterr = 0; ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); @@ -958,60 +936,55 @@ px_err_erpt_and_clr(px_t *px_p, ddi_fm_error_t *derr, px_err_ss_t *ss_p) /* Get the correct CSR BASE */ csr_base = (caddr_t)pxu_p->px_address[err_reg_tbl->reg_bank]; + /* If there are no errors in this register, continue */ + ss_reg = ss_p->err_status[reg_id]; + if (!ss_reg) + continue; + /* Get pointers to masks and register addresses */ - log_mask = err_reg_tbl->log_mask_p; count_mask = err_reg_tbl->count_mask_p; - status_addr = err_reg_tbl->status_addr; clear_addr = err_reg_tbl->clear_addr; - ss_reg = ss_p->err_status[reg_id]; /* Get the register BIT description table */ err_bit_tbl = err_reg_tbl->err_bit_tbl; /* For each known bit in the register send erpt and handle */ for (key = 0; key < err_reg_tbl->err_bit_keys; key++) { - /* Get the bit description table for this register */ - err_bit_desc = &err_bit_tbl[key]; - /* * If the ss_reg is set for this bit, * send ereport and handle */ - if (BIT_TST(ss_reg, err_bit_desc->bit)) { - /* Increment the counter if necessary */ - if (BIT_TST(*count_mask, err_bit_desc->bit)) { - err_bit_desc->counter++; - } - - /* Error Handle for this bit */ - err_handler = err_bit_desc->err_handler; - if (err_handler) { - biterr = err_handler(rpdip, - csr_base, - derr, - err_reg_tbl, - err_bit_desc); - err |= biterr; - } - - /* Send the ereport if it's an UNEXPECTED err */ - erpt_handler = err_bit_desc->erpt_handler; - if ((derr->fme_flag == DDI_FM_ERR_UNEXPECTED) && - (biterr != PX_OK)) { - if (erpt_handler) - (void) erpt_handler(rpdip, - csr_base, - ss_reg, - derr, - err_bit_desc->bit, - err_bit_desc->class_name); - } + err_bit_desc = &err_bit_tbl[key]; + if (!BIT_TST(ss_reg, err_bit_desc->bit)) + continue; + + /* Increment the counter if necessary */ + if (BIT_TST(*count_mask, err_bit_desc->bit)) { + err_bit_desc->counter++; } + + /* Error Handle for this bit */ + err_handler = err_bit_desc->err_handler; + if (err_handler) { + biterr = err_handler(rpdip, csr_base, derr, + err_reg_tbl, err_bit_desc); + err |= biterr; + } + + /* + * Send the ereport if it's an UNEXPECTED err. + * This is the only place where PX_EXPECTED is utilized. + */ + erpt_handler = err_bit_desc->erpt_handler; + if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) || + (biterr == PX_EXPECTED)) + continue; + + if (erpt_handler) + (void) erpt_handler(rpdip, csr_base, ss_reg, + derr, err_bit_desc->bit, + err_bit_desc->class_name); } - /* Print register status */ - if (ss_reg & *log_mask) - DBG(DBG_ERR_INTR, rpdip, "<%x>=%16llx %s\n", - status_addr, ss_reg, err_reg_tbl->msg); /* Clear the register and error */ CSR_XS(csr_base, clear_addr, ss_reg); @@ -1035,8 +1008,12 @@ px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller) px_pec_t *pec_p = px_p->px_pec_p; boolean_t is_safeacc = B_FALSE; - /* nothing to do if called with no error */ - if (err == PX_OK) + /* + * Nothing to do if called with no error. + * The err could have already been set to PX_NO_PANIC, which means the + * system doesn't need to panic, but PEEK/POKE still failed. + */ + if (err == PX_NO_ERROR) return (err); /* Cautious access error handling */ @@ -1071,84 +1048,103 @@ px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, int err, int caller) is_safeacc = B_FALSE; } - /* - * The third argument "err" is passed in as error status from checking - * Fire register, re-adjust error status from safe access. - */ - if (is_safeacc && !(err & PX_FATAL_GOS)) - return (PX_NONFATAL); + /* re-adjust error status from safe access, forgive all errors */ + if (is_safeacc) + return (PX_NO_PANIC); return (err); } /* predefined convenience functions */ /* ARGSUSED */ -int -px_err_fatal_hw_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) +void +px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr, char *msg) { - return (PX_FATAL_HW); + DBG(DBG_ERR_INTR, rpdip, + "Bit %d, %s, at %s(0x%x) has occured %d times with a severity " + "of \"%s\"\n", + err_bit_descr->bit, err_bit_descr->class_name, + err_reg_descr->msg, err_reg_descr->status_addr, + err_bit_descr->counter, msg); } /* ARGSUSED */ int -px_err_fatal_gos_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - return (PX_FATAL_GOS); + if (px_log & PX_HW_RESET) { + px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, + "HW RESET"); + } + + return (PX_HW_RESET); } /* ARGSUSED */ int -px_err_fatal_stuck_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - return (PX_STUCK_FATAL); + if (px_log & PX_PANIC) { + px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, "PANIC"); + } + + return (PX_PANIC); } /* ARGSUSED */ int -px_err_fatal_sw_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - return (PX_FATAL_SW); + if (px_log & PX_PROTECTED) { + px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, + "PROTECTED"); + } + + return (PX_PROTECTED); } /* ARGSUSED */ int -px_err_non_fatal_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - return (PX_NONFATAL); -} + if (px_log & PX_NO_PANIC) { + px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, + "NO PANIC"); + } -/* ARGSUSED */ -int -px_err_ok_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, - px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) -{ - return (PX_OK); + return (PX_NO_PANIC); } /* ARGSUSED */ int -px_err_unknown_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, - px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) +px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) { - return (PX_ERR_UNKNOWN); + if (px_log & PX_NO_ERROR) { + px_err_log_handle(rpdip, err_reg_descr, err_bit_descr, + "NO ERROR"); + } + + return (PX_NO_ERROR); } /* ARGSUSED */ PX_ERPT_SEND_DEC(do_not) { - return (PX_OK); + return (PX_NO_ERROR); } + /* UBC FATAL - see io erpt doc, section 1.1 */ /* ARGSUSED */ PX_ERPT_SEND_DEC(ubc_fatal) @@ -1246,10 +1242,10 @@ PX_ERPT_SEND_DEC(ubc_fatal) NULL); } - return (PX_OK); + return (PX_NO_PANIC); } -/* JBC FATAL - see io erpt doc, section 1.1 */ +/* JBC FATAL */ PX_ERPT_SEND_DEC(jbc_fatal) { char buf[FM_MAX_CLASS]; @@ -1273,10 +1269,10 @@ PX_ERPT_SEND_DEC(jbc_fatal) CSR_XR(csr_base, FATAL_ERROR_LOG_2), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* JBC MERGE - see io erpt doc, section 1.2 */ +/* JBC MERGE */ PX_ERPT_SEND_DEC(jbc_merge) { char buf[FM_MAX_CLASS]; @@ -1298,37 +1294,38 @@ PX_ERPT_SEND_DEC(jbc_merge) CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } /* - * JBC Merge buffer nonfatal errors: - * Merge buffer parity error (rd_buf): dma:read:M:nonfatal - * Merge buffer parity error (wr_buf): dma:write:M:nonfatal + * JBC Merge buffer retryable errors: + * Merge buffer parity error (rd_buf): PIO or DMA + * Merge buffer parity error (wr_buf): PIO or DMA */ /* ARGSUSED */ int px_err_jbc_merge_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) { - boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); - uint64_t paddr; - int ret; - - if (!pri) - return (PX_FATAL_GOS); - - paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); - paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; - - ret = px_handle_lookup( - rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); - - return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); + /* + * Holder function to attempt error recovery. When the features + * are in place, look up the address of the transaction in: + * + * paddr = CSR_XR(csr_base, MERGE_TRANSACTION_ERROR_LOG); + * paddr &= MERGE_TRANSACTION_ERROR_LOG_ADDRESS_MASK; + * + * If the error is a secondary error, there is no log information + * just panic as it is unknown which address has been affected. + * + * Remember the address is pretranslation and might be hard to look + * up the appropriate driver based on the PA. + */ + return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, + err_bit_descr)); } -/* JBC Jbusint IN - see io erpt doc, section 1.3 */ +/* JBC Jbusint IN */ PX_ERPT_SEND_DEC(jbc_in) { char buf[FM_MAX_CLASS]; @@ -1352,45 +1349,39 @@ PX_ERPT_SEND_DEC(jbc_in) CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG_2), NULL); - return (PX_OK); + return (PX_NO_PANIC); } /* - * JBC Jbusint IN nonfatal errors: PA logged in Jbusint In Transaction Error + * JBC Jbusint IN retryable errors * Log Reg[42:0]. - * CE async fault error: nonfatal - * Jbus bus error: dma::nonfatal - * Jbus unmapped error: pio|dma:rdwr:M:nonfatal - * Write data parity error: pio/write:M:nonfatal - * Read data parity error: pio/read:M:nonfatal - * Illegal NCWR bytemask: pio:write:M:nonfatal - * Illegal NCRD bytemask: pio:write:M:nonfatal - * Invalid jbus transaction: nonfatal + * Write Data Parity Error: PIO Writes + * Read Data Parity Error: DMA Reads */ -/* ARGSUSED */ int px_err_jbc_jbusint_in_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); - uint64_t paddr; - int ret; - - if (!pri) - return (PX_FATAL_GOS); - - paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); - paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; - - ret = px_handle_lookup( - rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); - - return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); + /* + * Holder function to attempt error recovery. When the features + * are in place, look up the address of the transaction in: + * + * paddr = CSR_XR(csr_base, JBCINT_IN_TRANSACTION_ERROR_LOG); + * paddr &= JBCINT_IN_TRANSACTION_ERROR_LOG_ADDRESS_MASK; + * + * If the error is a secondary error, there is no log information + * just panic as it is unknown which address has been affected. + * + * Remember the address is pretranslation and might be hard to look + * up the appropriate driver based on the PA. + */ + return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, + err_bit_descr)); } -/* JBC Jbusint Out - see io erpt doc, section 1.4 */ +/* JBC Jbusint Out */ PX_ERPT_SEND_DEC(jbc_out) { char buf[FM_MAX_CLASS]; @@ -1414,10 +1405,10 @@ PX_ERPT_SEND_DEC(jbc_out) CSR_XR(csr_base, JBCINT_OUT_TRANSACTION_ERROR_LOG_2), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* JBC Dmcint ODCD - see io erpt doc, section 1.5 */ +/* JBC Dmcint ODCD */ PX_ERPT_SEND_DEC(jbc_odcd) { char buf[FM_MAX_CLASS]; @@ -1439,12 +1430,12 @@ PX_ERPT_SEND_DEC(jbc_odcd) CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } /* * JBC Dmcint ODCO nonfatal errer handling - - * PIO data parity error: pio:write:M:nonfatal + * PIO data parity error: PIO */ /* ARGSUSED */ int @@ -1452,20 +1443,21 @@ px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); - uint64_t paddr; - int ret; - - if (!pri) - return (PX_FATAL_GOS); - - paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); - paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; - - ret = px_handle_lookup( - rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); - - return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); + /* + * Holder function to attempt error recovery. When the features + * are in place, look up the address of the transaction in: + * + * paddr = CSR_XR(csr_base, DMCINT_ODCD_ERROR_LOG); + * paddr &= DMCINT_ODCD_ERROR_LOG_ADDRESS_MASK; + * + * If the error is a secondary error, there is no log information + * just panic as it is unknown which address has been affected. + * + * Remember the address is pretranslation and might be hard to look + * up the appropriate driver based on the PA. + */ + return (px_err_panic_handle(rpdip, csr_base, derr, err_reg_descr, + err_bit_descr)); } /* Does address in DMCINT error log register match address of pcitool access? */ @@ -1499,7 +1491,8 @@ px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base, boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); if (!pri) - return (PX_FATAL_GOS); + return (px_err_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); /* * Got an error which is forgivable during a PCItool access. * @@ -1512,13 +1505,14 @@ px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base, */ if ((derr->fme_flag != DDI_FM_ERR_UNEXPECTED) && (px_jbc_pcitool_addr_match(rpdip, csr_base))) - return (PX_FATAL_SW); + return (px_err_protected_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); return (px_err_jbc_dmcint_odcd_handle(rpdip, csr_base, derr, err_reg_descr, err_bit_descr)); } -/* JBC Dmcint IDC - see io erpt doc, section 1.6 */ +/* JBC Dmcint IDC */ PX_ERPT_SEND_DEC(jbc_idc) { char buf[FM_MAX_CLASS]; @@ -1540,10 +1534,10 @@ PX_ERPT_SEND_DEC(jbc_idc) CSR_XR(csr_base, DMCINT_IDC_ERROR_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* JBC CSR - see io erpt doc, section 1.7 */ +/* JBC CSR */ PX_ERPT_SEND_DEC(jbc_csr) { char buf[FM_MAX_CLASS]; @@ -1565,38 +1559,10 @@ PX_ERPT_SEND_DEC(jbc_csr) CSR_XR(csr_base, CSR_ERROR_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* - * JBC CSR errer handling - - * Ebus ready timeout error: pio:rdwr:M:nonfatal - */ -/* ARGSUSED */ -int -px_err_jbc_csr_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) -{ - boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); - uint64_t paddr; - int ret; - - if (!pri) - return (PX_FATAL_GOS); - - paddr = CSR_XR(csr_base, CSR_ERROR_LOG); - paddr &= CSR_ERROR_LOG_ADDRESS_MASK; - - ret = px_handle_lookup( - rpdip, DMA_HANDLE, derr->fme_ena, (void *)paddr); - - return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); -} - -/* JBC Dmcint IDC - see io erpt doc, section 1.6 */ - -/* DMC IMU RDS - see io erpt doc, section 2.1 */ +/* DMC IMU RDS */ PX_ERPT_SEND_DEC(imu_rds) { char buf[FM_MAX_CLASS]; @@ -1618,53 +1584,7 @@ PX_ERPT_SEND_DEC(imu_rds) CSR_XR(csr_base, IMU_RDS_ERROR_LOG), NULL); - return (PX_OK); -} - -/* imu function to handle all Received but Not Enabled errors */ -/* ARGSUSED */ -int -px_err_imu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) -{ - uint64_t imu_log_enable, imu_intr_enable; - int mask = BITMASK(err_bit_descr->bit); - int err = PX_NONFATAL; - - imu_log_enable = CSR_XR(csr_base, err_reg_descr->log_addr); - imu_intr_enable = CSR_XR(csr_base, err_reg_descr->enable_addr); - - /* - * If matching bit is not set, meaning corresponding rbne not - * enabled, then receiving it indicates some sort of malfunction - * possibly in hardware. - * - * Other wise, software may have intentionally disabled certain - * errors for a period of time within which the occuring of the - * disabled errors become rbne, that is non fatal. - */ - - if (!(imu_log_enable & imu_intr_enable & mask)) - err = PX_FATAL_GOS; - - return (err); -} - -/* - * No platforms uses PME. Any PME received is simply logged - * for analysis. - */ -/* ARGSUSED */ -int -px_err_imu_pme_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr) -{ - px_t *px_p = DIP_TO_STATE(rpdip); - - px_p->px_pme_ignored++; - return (PX_NONFATAL); + return (PX_NO_PANIC); } /* handle EQ overflow */ @@ -1674,27 +1594,20 @@ px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - px_t *px_p = DIP_TO_STATE(rpdip); - px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; - msiqid_t eqno; - pci_msiq_state_t msiq_state; - int err = PX_NONFATAL; - int i; - - eqno = msiq_state_p->msiq_1st_msiq_id; - for (i = 0; i < msiq_state_p->msiq_cnt; i++) { - if (px_lib_msiq_getstate(rpdip, eqno, &msiq_state) == - DDI_SUCCESS) { - if (msiq_state == PCI_MSIQ_STATE_ERROR) { - err = PX_FATAL_GOS; - } - } - } + px_t *px_p = DIP_TO_STATE(rpdip); + pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; + int err = px_err_check_eq(rpdip); - return (err); + if ((err == PX_PANIC) && (pxu_p->cpr_flag == PX_NOT_CPR)) { + return (px_err_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); + } else { + return (px_err_no_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); + } } -/* DMC IMU SCS - see io erpt doc, section 2.2 */ +/* DMC IMU SCS */ PX_ERPT_SEND_DEC(imu_scs) { char buf[FM_MAX_CLASS]; @@ -1716,10 +1629,10 @@ PX_ERPT_SEND_DEC(imu_scs) CSR_XR(csr_base, IMU_SCS_ERROR_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* DMC IMU - see io erpt doc, section 2.3 */ +/* DMC IMU */ PX_ERPT_SEND_DEC(imu) { char buf[FM_MAX_CLASS]; @@ -1739,14 +1652,27 @@ PX_ERPT_SEND_DEC(imu) CSR_XR(csr_base, IMU_ERROR_STATUS_SET), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* DMC MMU TFAR/TFSR - see io erpt doc, section 2.4 */ +/* DMC MMU TFAR/TFSR */ PX_ERPT_SEND_DEC(mmu_tfar_tfsr) { char buf[FM_MAX_CLASS]; boolean_t pri = PX_ERR_IS_PRI(bit); + px_t *px_p = DIP_TO_STATE(rpdip); + pcie_req_id_t fault_bdf = 0; + uint16_t s_status = 0; + + if (pri) { + fault_bdf = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS) + & (MMU_TRANSLATION_FAULT_STATUS_ID_MASK << + MMU_TRANSLATION_FAULT_STATUS_ID); + s_status = PCI_STAT_S_TARG_AB; + + /* Only PIO Fault Addresses are valid, this is DMA */ + (void) px_rp_en_q(px_p, fault_bdf, NULL, s_status); + } (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); @@ -1767,10 +1693,10 @@ PX_ERPT_SEND_DEC(mmu_tfar_tfsr) CSR_XR(csr_base, MMU_TRANSLATION_FAULT_STATUS), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* DMC MMU - see io erpt doc, section 2.5 */ +/* DMC MMU */ PX_ERPT_SEND_DEC(mmu) { char buf[FM_MAX_CLASS]; @@ -1790,112 +1716,153 @@ PX_ERPT_SEND_DEC(mmu) CSR_XR(csr_base, MMU_ERROR_STATUS_SET), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* imu function to handle all Received but Not Enabled errors */ +/* + * IMU function to handle all Received but Not Enabled errors. + * + * These errors are due to transactions modes in which the PX driver was not + * setup to be able to do. If possible, inform the driver that their DMA has + * failed by marking their DMA handle as failed, but do not panic the system. + * Most likely the address is not valid, as Fire wasn't setup to handle them in + * the first place. + * + * These errors are not retryable, unless the PX mode has changed, otherwise the + * same error will occur again. + */ int px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); - uint64_t mmu_log_enable, mmu_intr_enable; - uint64_t mask = BITMASK(err_bit_descr->bit); - uint64_t mmu_tfa, mmu_ctrl; - uint64_t mmu_enable_bit = 0; - int err = PX_NONFATAL; - int ret; + pcie_req_id_t bdf; - mmu_log_enable = CSR_XR(csr_base, err_reg_descr->log_addr); - mmu_intr_enable = CSR_XR(csr_base, err_reg_descr->enable_addr); - - mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); - mmu_ctrl = CSR_XR(csr_base, MMU_CONTROL_AND_STATUS); + if (!PX_ERR_IS_PRI(err_bit_descr->bit)) + goto done; - switch (err_bit_descr->bit) { - case MMU_INTERRUPT_STATUS_BYP_ERR_P: - mmu_enable_bit = BITMASK(MMU_CONTROL_AND_STATUS_BE); - break; - case MMU_INTERRUPT_STATUS_TRN_ERR_P: - mmu_enable_bit = BITMASK(MMU_CONTROL_AND_STATUS_TE); - break; - default: - mmu_enable_bit = 0; - break; - } - - /* - * If the interrupts are enabled and Translation/Bypass Enable bit - * was set, then panic. This error should not have occured. - */ - if (mmu_log_enable & mmu_intr_enable & - (mmu_ctrl & mmu_enable_bit)) { - err = PX_FATAL_GOS; - } else { - if (!pri) - return (PX_FATAL_GOS); + bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); + (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, NULL, + bdf); - ret = px_handle_lookup( - rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); - err = (ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL; +done: + return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, + err_bit_descr)); +} - /* - * S/W bug - this error should always be enabled - */ +/* + * IMU function to handle all invalid address errors. + * + * These errors are due to transactions in which the address is not recognized. + * If possible, inform the driver that all DMAs have failed by marking their DMA + * handles. Fire should not panic the system, it'll be up to the driver to + * panic. The address logged is invalid. + * + * These errors are not retryable since retrying the same transaction with the + * same invalid address will result in the same error. + */ +/* ARGSUSED */ +int +px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr) +{ + pcie_req_id_t bdf; - /* enable error & intr reporting for this bit */ - CSR_XS(csr_base, MMU_ERROR_LOG_ENABLE, mmu_log_enable | mask); - CSR_XS(csr_base, MMU_INTERRUPT_ENABLE, mmu_intr_enable | mask); + if (!PX_ERR_IS_PRI(err_bit_descr->bit)) + goto done; - /* enable translation access/bypass enable */ - CSR_XS(csr_base, MMU_CONTROL_AND_STATUS, - mmu_ctrl | mmu_enable_bit); - } + bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); + (void) pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, NULL, + bdf); - return (err); +done: + return (px_err_no_panic_handle(rpdip, csr_base, derr, err_reg_descr, + err_bit_descr)); } -/* Generic error handling functions that involve MMU Translation Fault Addr */ +/* + * IMU function to handle normal transactions that encounter a parity error. + * + * These errors are due to transactions that enouter a parity error. If + * possible, inform the driver that their DMA have failed and that they should + * retry. If Fire is unable to contact the leaf driver, panic the system. + * Otherwise, it'll be up to the device to determine is this is a panicable + * error. + */ /* ARGSUSED */ int -px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); - uint64_t mmu_tfa; - uint_t ret; + uint64_t mmu_tfa; + pcie_req_id_t bdf; + int status = DDI_FM_UNKNOWN; - if (!pri) - return (PX_FATAL_GOS); + if (!PX_ERR_IS_PRI(err_bit_descr->bit)) + goto done; mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); - ret = px_handle_lookup( - rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); - - return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); + bdf = (pcie_req_id_t)CSR_FR(csr_base, MMU_TRANSLATION_FAULT_STATUS, ID); + status = pf_hdl_lookup(rpdip, derr->fme_ena, PF_DMA_ADDR, + (uint32_t)mmu_tfa, bdf); + +done: + if (status == DDI_FM_UNKNOWN) + return (px_err_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); + else + return (px_err_no_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); } -/* MMU Table walk errors */ +/* + * wuc/ruc event - Mark the handle of the failed PIO access. Return "no_panic" + */ /* ARGSUSED */ int -px_err_mmu_tblwlk_handle(dev_info_t *rpdip, caddr_t csr_base, +px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - boolean_t pri = PX_ERR_IS_PRI(err_bit_descr->bit); - uint64_t mmu_tfa; - uint_t ret; - - if (!pri) - return (PX_FATAL_GOS); + px_t *px_p = DIP_TO_STATE(rpdip); + pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p; + uint64_t data; + uint32_t addr, hdr; + pcie_tlp_hdr_t *tlp; + int sts = PF_HDL_NOTFOUND; + + if (!PX_ERR_IS_PRI(err_bit_descr->bit)) + goto done; + + data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); + hdr = (uint32_t)(data >> 32); + tlp = (pcie_tlp_hdr_t *)&hdr; + data = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); + addr = (uint32_t)(data >> 32); + + switch (tlp->type) { + case PCIE_TLP_TYPE_IO: + case PCIE_TLP_TYPE_MEM: + case PCIE_TLP_TYPE_MEMLK: + sts = pf_hdl_lookup(rpdip, derr->fme_ena, PF_PIO_ADDR, + addr, NULL); + break; + case PCIE_TLP_TYPE_CFG0: + case PCIE_TLP_TYPE_CFG1: + sts = pf_hdl_lookup(rpdip, derr->fme_ena, PF_CFG_ADDR, + addr, (addr >> 16)); + break; + } - mmu_tfa = CSR_XR(csr_base, MMU_TRANSLATION_FAULT_ADDRESS); - ret = px_handle_lookup( - rpdip, DMA_HANDLE, derr->fme_ena, (void *)mmu_tfa); +done: + if ((sts == PF_HDL_NOTFOUND) && (pxu_p->cpr_flag == PX_NOT_CPR)) + return (px_err_protected_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); - return ((ret == DDI_FM_FATAL) ? PX_FATAL_GOS : PX_NONFATAL); + return (px_err_no_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); } /* @@ -1917,7 +1884,7 @@ px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base, * error condition. */ return ((atomic_cas_32(&px_p->px_lup_pending, 1, 0) == 0) ? - PX_NONFATAL : PX_OK); + PX_NO_PANIC : PX_EXPECTED); } /* @@ -1931,7 +1898,8 @@ px_err_tlu_ldn_handle(dev_info_t *rpdip, caddr_t csr_base, px_err_bit_desc_t *err_bit_descr) { px_t *px_p = DIP_TO_STATE(rpdip); - return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_OK : PX_NONFATAL); + return ((px_p->px_pm_flags & PX_LDN_EXPECTED) ? PX_EXPECTED : + PX_NO_PANIC); } /* PEC ILU none - see io erpt doc, section 3.1 */ @@ -1954,7 +1922,7 @@ PX_ERPT_SEND_DEC(pec_ilu) CSR_XR(csr_base, ILU_ERROR_STATUS_SET), NULL); - return (PX_OK); + return (PX_NO_PANIC); } /* PCIEX UE Errors */ @@ -1964,14 +1932,60 @@ px_err_pciex_ue_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - uint32_t mask = (uint32_t)BITMASK(err_bit_descr->bit); + px_err_pcie_t regs = {0}; + uint32_t err_bit; + int err; + uint64_t log; + + if (err_bit_descr->bit < 32) { + err_bit = (uint32_t)BITMASK(err_bit_descr->bit); + regs.ue_reg = err_bit; + regs.primary_ue = err_bit; + + /* + * Log the Received Log for PTLP and UR. The PTLP most likely + * is a poisoned completion. The original transaction will be + * logged inthe Transmit Log. + */ + if (err_bit & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_UR)) { + log = CSR_XR(csr_base, + TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER1_LOG); + regs.rx_hdr1 = (uint32_t)(log >> 32); + regs.rx_hdr2 = (uint32_t)(log && 0xFFFFFFFF); + + log = CSR_XR(csr_base, + TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG); + regs.rx_hdr3 = (uint32_t)(log >> 32); + regs.rx_hdr4 = (uint32_t)(log && 0xFFFFFFFF); + } + + if (err_bit & (PCIE_AER_UCE_PTLP)) { + log = CSR_XR(csr_base, + TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER1_LOG); + regs.tx_hdr1 = (uint32_t)(log >> 32); + regs.tx_hdr2 = (uint32_t)(log && 0xFFFFFFFF); + + log = CSR_XR(csr_base, + TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG); + regs.tx_hdr3 = (uint32_t)(log >> 32); + regs.tx_hdr4 = (uint32_t)(log && 0xFFFFFFFF); + } + } else { + regs.ue_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); + } - return ((err_bit_descr->bit >= 32 && px_fabric_die_rc_ue_gos) ? - PX_FATAL_GOS : PX_FABRIC_ERR_SEV(mask, px_fabric_die_rc_ue, - px_fabric_die_rc_ue_gos)); + err = px_err_check_pcie(rpdip, derr, ®s); + + if (err == PX_PANIC) { + return (px_err_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); + } else { + return (px_err_no_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); + } } -/* PCI-E Uncorrectable Errors - see io erpt doc, section 3.2 */ +/* PCI-E Uncorrectable Errors */ PX_ERPT_SEND_DEC(pciex_rx_ue) { char buf[FM_MAX_CLASS]; @@ -1995,10 +2009,10 @@ PX_ERPT_SEND_DEC(pciex_rx_ue) CSR_XR(csr_base, TLU_RECEIVE_UNCORRECTABLE_ERROR_HEADER2_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* PCI-E Uncorrectable Errors - see io erpt doc, section 3.3 */ +/* PCI-E Uncorrectable Errors */ PX_ERPT_SEND_DEC(pciex_tx_ue) { char buf[FM_MAX_CLASS]; @@ -2022,10 +2036,10 @@ PX_ERPT_SEND_DEC(pciex_tx_ue) CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* PCI-E Uncorrectable Errors - see io erpt doc, section 3.4 */ +/* PCI-E Uncorrectable Errors */ PX_ERPT_SEND_DEC(pciex_rx_tx_ue) { char buf[FM_MAX_CLASS]; @@ -2053,10 +2067,10 @@ PX_ERPT_SEND_DEC(pciex_rx_tx_ue) CSR_XR(csr_base, TLU_TRANSMIT_UNCORRECTABLE_ERROR_HEADER2_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } -/* PCI-E Uncorrectable Errors - see io erpt doc, section 3.5 */ +/* PCI-E Uncorrectable Errors */ PX_ERPT_SEND_DEC(pciex_ue) { char buf[FM_MAX_CLASS]; @@ -2076,7 +2090,7 @@ PX_ERPT_SEND_DEC(pciex_ue) CSR_XR(csr_base, TLU_UNCORRECTABLE_ERROR_STATUS_SET), NULL); - return (PX_OK); + return (PX_NO_PANIC); } /* PCIEX UE Errors */ @@ -2086,11 +2100,23 @@ px_err_pciex_ce_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr) { - uint32_t mask = (uint32_t)BITMASK(err_bit_descr->bit); + px_err_pcie_t regs = {0}; + int err; + + if (err_bit_descr->bit < 32) + regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit); + else + regs.ce_reg = (uint32_t)BITMASK(err_bit_descr->bit - 32); + + err = px_err_check_pcie(rpdip, derr, ®s); - return ((err_bit_descr->bit >= 32 && px_fabric_die_rc_ce_gos) ? - PX_FATAL_GOS : PX_FABRIC_ERR_SEV(mask, px_fabric_die_rc_ce, - px_fabric_die_rc_ce_gos)); + if (err == PX_PANIC) { + return (px_err_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); + } else { + return (px_err_no_panic_handle(rpdip, csr_base, derr, + err_reg_descr, err_bit_descr)); + } } /* PCI-E Correctable Errors - see io erpt doc, section 3.6 */ @@ -2113,7 +2139,7 @@ PX_ERPT_SEND_DEC(pciex_ce) CSR_XR(csr_base, TLU_CORRECTABLE_ERROR_STATUS_SET), NULL); - return (PX_OK); + return (PX_NO_PANIC); } /* TLU Other Event Status (receive only) - see io erpt doc, section 3.7 */ @@ -2140,7 +2166,7 @@ PX_ERPT_SEND_DEC(pciex_rx_oe) CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), NULL); - return (PX_OK); + return (PX_NO_PANIC); } /* TLU Other Event Status (rx + tx) - see io erpt doc, section 3.8 */ @@ -2148,6 +2174,51 @@ PX_ERPT_SEND_DEC(pciex_rx_tx_oe) { char buf[FM_MAX_CLASS]; boolean_t pri = PX_ERR_IS_PRI(bit); + px_t *px_p = DIP_TO_STATE(rpdip); + uint32_t trans_type, fault_addr = 0; + uint64_t rx_h1, rx_h2, tx_h1, tx_h2; + uint16_t s_status; + int sts; + pcie_req_id_t fault_bdf = 0; + pcie_cpl_t *cpl; + pf_data_t pf_data = {0}; + + rx_h1 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG); + rx_h2 = CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG); + tx_h1 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG); + tx_h2 = CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG); + + if ((bit == TLU_OTHER_EVENT_STATUS_SET_RUC_P) || + (bit == TLU_OTHER_EVENT_STATUS_SET_WUC_P)) { + pf_data.aer_h0 = (uint32_t)(rx_h1 >> 32); + pf_data.aer_h1 = (uint32_t)rx_h1; + pf_data.aer_h2 = (uint32_t)(rx_h2 >> 32); + pf_data.aer_h3 = (uint32_t)rx_h2; + + /* get completer bdf (fault bdf) from rx logs */ + cpl = (pcie_cpl_t *)&pf_data.aer_h1; + fault_bdf = cpl->cid; + + /* Figure out if UR/CA from rx logs */ + if (cpl->status == PCIE_CPL_STS_UR) + s_status = PCI_STAT_R_MAST_AB; + else if (cpl->status == PCIE_CPL_STS_CA) + s_status = PCI_STAT_R_TARG_AB; + + + pf_data.aer_h0 = (uint32_t)(tx_h1 >> 32); + pf_data.aer_h1 = (uint32_t)tx_h1; + pf_data.aer_h2 = (uint32_t)(tx_h2 >> 32); + pf_data.aer_h3 = (uint32_t)tx_h2; + + /* get fault addr from tx logs */ + sts = pf_tlp_decode(rpdip, &pf_data, 0, &fault_addr, + &trans_type); + + if (sts == DDI_SUCCESS) + (void) px_rp_en_q(px_p, fault_bdf, fault_addr, + s_status); + } (void) snprintf(buf, FM_MAX_CLASS, "%s", class_name); ddi_fm_ereport_post(rpdip, buf, derr->fme_ena, @@ -2161,17 +2232,13 @@ PX_ERPT_SEND_DEC(pciex_rx_tx_oe) ss_reg, FIRE_TLU_OEESS, DATA_TYPE_UINT64, CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), - FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, - CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER1_LOG), - FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, - CSR_XR(csr_base, TLU_RECEIVE_OTHER_EVENT_HEADER2_LOG), - FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, - CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER1_LOG), - FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, - CSR_XR(csr_base, TLU_TRANSMIT_OTHER_EVENT_HEADER2_LOG), + FIRE_TLU_ROEEH1L, DATA_TYPE_UINT64, rx_h1, + FIRE_TLU_ROEEH2L, DATA_TYPE_UINT64, rx_h2, + FIRE_TLU_TOEEH1L, DATA_TYPE_UINT64, tx_h1, + FIRE_TLU_TOEEH2L, DATA_TYPE_UINT64, tx_h2, NULL); - return (PX_OK); + return (PX_NO_PANIC); } /* TLU Other Event - see io erpt doc, section 3.9 */ @@ -2194,5 +2261,5 @@ PX_ERPT_SEND_DEC(pciex_oe) CSR_XR(csr_base, TLU_OTHER_EVENT_STATUS_SET), NULL); - return (PX_OK); + return (PX_NO_PANIC); } diff --git a/usr/src/uts/sun4u/io/px/px_err_impl.h b/usr/src/uts/sun4u/io/px/px_err_impl.h index 8f781aafdf..9d8c961599 100644 --- a/usr/src/uts/sun4u/io/px/px_err_impl.h +++ b/usr/src/uts/sun4u/io/px/px_err_impl.h @@ -90,7 +90,7 @@ typedef struct px_err_reg_desc { * Macro to create the error handling forward declaration * * The error handlers examines error, determine the nature of the error - * and return error status in terms of PX_FATAL_HW | PX_FATAL_GOS | ... + * and return error status in terms of PX_HW_RESET | PX_PANIC | ... * terminology. */ #define PX_ERR_BIT_HANDLE_DEC(n) int px_err_ ## n ## _handle\ @@ -114,25 +114,21 @@ typedef struct px_err_reg_desc { /* * Predefined error handling functions. */ -int px_err_fatal_hw_handle(dev_info_t *rpdip, caddr_t csr_base, +void px_err_log_handle(dev_info_t *rpdip, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr, char *msg); +int px_err_hw_reset_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_fatal_gos_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_panic_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_fatal_stuck_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_protected_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_fatal_sw_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_no_panic_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_non_fatal_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr); -int px_err_ok_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr); -int px_err_unknown_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_no_error_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); @@ -151,8 +147,8 @@ PX_ERPT_SEND_DEC(do_not); /* * Fire JBC error Handling Forward Declarations - * the must-panic type errors such as PX_FATAL_GOS or - * post-reset-diagnosed type error such as PX_FATAL_HW + * the must-panic type errors such as PX_PANIC or + * post-reset-diagnosed type error such as PX_HW_RESET * are not furthur diagnosed here because there is no * justification to find out more as immediate error * handling. FMA DE will do the post analysis. @@ -169,9 +165,6 @@ int px_err_jbc_dmcint_odcd_handle(dev_info_t *rpdip, caddr_t csr_base, int px_err_jbc_safe_acc_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_jbc_csr_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr); /* Fire JBC error ereport Forward Declarations */ PX_ERPT_SEND_DEC(jbc_fatal); @@ -193,12 +186,6 @@ PX_ERPT_SEND_DEC(ubc_fatal); #define PX_ERR_DMC_CLASS(n) PCIEX_FIRE "." FIRE_DMC_ ## n /* Fire Bit Error Handling Forward Declarations */ -int px_err_imu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr); -int px_err_imu_pme_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr); int px_err_imu_eq_ovfl_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); @@ -208,10 +195,7 @@ int px_err_mmu_rbne_handle(dev_info_t *rpdip, caddr_t csr_base, int px_err_mmu_tfa_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); -int px_err_mmu_tte_cae_handle(dev_info_t *rpdip, caddr_t csr_base, - ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, - px_err_bit_desc_t *err_bit_descr); -int px_err_mmu_tblwlk_handle(dev_info_t *rpdip, caddr_t csr_base, +int px_err_mmu_parity_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); @@ -229,6 +213,9 @@ PX_ERPT_SEND_DEC(mmu); #define PX_ERR_PEC_CLASS(n) PCIEX_FIRE "." FIRE_PEC_ ## n #define PX_ERR_PEC_OB_CLASS(n) PCIEX_OBERON "." FIRE_PEC_ ## n +int px_err_wuc_ruc_handle(dev_info_t *rpdip, caddr_t csr_base, + ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, + px_err_bit_desc_t *err_bit_descr); int px_err_tlu_lup_handle(dev_info_t *rpdip, caddr_t csr_base, ddi_fm_error_t *derr, px_err_reg_desc_t *err_reg_descr, px_err_bit_desc_t *err_bit_descr); diff --git a/usr/src/uts/sun4u/io/px/px_lib4u.c b/usr/src/uts/sun4u/io/px/px_lib4u.c index dd288b3c2d..d8f547ec13 100644 --- a/usr/src/uts/sun4u/io/px/px_lib4u.c +++ b/usr/src/uts/sun4u/io/px/px_lib4u.c @@ -1260,6 +1260,7 @@ px_lib_suspend(dev_info_t *dip) if (ret != H_EOK) cb_p->attachcnt++; } + pxu_p->cpr_flag = PX_ENTERED_CPR; fail: return ((ret != H_EOK) ? DDI_FAILURE: DDI_SUCCESS); @@ -1424,14 +1425,19 @@ px_lib_map_attr_check(ddi_map_req_t *mp) hp->ah_acc.devacc_attr_dataorder = DDI_STRICTORDER_ACC; } +/* This function is called only by poke, caut put and pxtool poke. */ void -px_lib_clr_errs(px_t *px_p) +px_lib_clr_errs(px_t *px_p, dev_info_t *rdip, uint64_t addr) { px_pec_t *pec_p = px_p->px_pec_p; dev_info_t *rpdip = px_p->px_dip; - int err = PX_OK, ret; + int rc_err, fab_err, i; int acctype = pec_p->pec_safeacc_type; ddi_fm_error_t derr; + px_ranges_t *ranges_p; + int range_len; + uint32_t addr_high, addr_low; + pcie_req_id_t bdf = 0; /* Create the derr */ bzero(&derr, sizeof (ddi_fm_error_t)); @@ -1447,19 +1453,44 @@ px_lib_clr_errs(px_t *px_p) mutex_enter(&px_p->px_fm_mutex); /* send ereport/handle/clear fire registers */ - err = px_err_handle(px_p, &derr, PX_LIB_CALL, B_TRUE); - - /* Check all child devices for errors */ - ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); + rc_err = px_err_cmn_intr(px_p, &derr, PX_LIB_CALL, PX_FM_BLOCK_ALL); + + /* Figure out if this is a cfg or mem32 access */ + addr_high = (uint32_t)(addr >> 32); + addr_low = (uint32_t)addr; + range_len = px_p->px_ranges_length / sizeof (px_ranges_t); + i = 0; + for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) { + if (ranges_p->parent_high == addr_high) { + switch (ranges_p->child_high & PCI_ADDR_MASK) { + case PCI_ADDR_CONFIG: + bdf = (pcie_req_id_t)(addr_low >> 12); + addr_low = 0; + break; + case PCI_ADDR_MEM32: + if (rdip) + (void) pcie_get_bdf_from_dip(rdip, + &bdf); + else + bdf = NULL; + break; + } + break; + } + } - mutex_exit(&px_p->px_fm_mutex); + px_rp_en_q(px_p, bdf, addr_low, NULL); /* - * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, - * therefore it does not cause panic. + * XXX - Current code scans the fabric for all px_tool accesses. + * In future, do not scan fabric for px_tool access to IO Root Nexus */ - if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) - PX_FM_PANIC("Fatal System Port Error has occurred\n"); + fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, + &px_p->px_dq_tail); + + mutex_exit(&px_p->px_fm_mutex); + + px_err_panic(rc_err, PX_RC, fab_err); } #ifdef DEBUG @@ -1492,7 +1523,7 @@ px_lib_do_poke(dev_info_t *dip, dev_info_t *rdip, } else err = DDI_FAILURE; - px_lib_clr_errs(px_p); + px_lib_clr_errs(px_p, rdip, in_args->dev_addr); if (otd.ot_trap & OT_DATA_ACCESS) err = DDI_FAILURE; @@ -1567,7 +1598,7 @@ px_lib_do_caut_put(dev_info_t *dip, dev_info_t *rdip, if (flags == DDI_DEV_AUTOINCR) dev_addr += size; - px_lib_clr_errs(px_p); + px_lib_clr_errs(px_p, rdip, dev_addr); if (pec_p->pec_ontrap_data->ot_trap & OT_DATA_ACCESS) { err = DDI_FAILURE; @@ -2305,6 +2336,7 @@ px_cpr_callb(void *arg, int code) break; case CB_CODE_CPR_RESUME: + pxu_p->cpr_flag = PX_NOT_CPR; mutex_enter(&ib_p->ib_ino_lst_mutex); ce_ino_p = px_ib_locate_ino(ib_p, ce_ino); diff --git a/usr/src/uts/sun4u/io/px/px_lib4u.h b/usr/src/uts/sun4u/io/px/px_lib4u.h index cb4567302d..36de19f739 100644 --- a/usr/src/uts/sun4u/io/px/px_lib4u.h +++ b/usr/src/uts/sun4u/io/px/px_lib4u.h @@ -107,6 +107,7 @@ typedef struct pxu { uint64_t *ib_config_state; uint64_t *xcb_config_state; uint64_t *msiq_config_state; + uint_t cpr_flag; /* sun4u specific vars */ caddr_t px_address[4]; @@ -118,6 +119,10 @@ typedef struct pxu { #define PX2CB(px_p) (((pxu_t *)px_p->px_plat_p)->px_cb_p) +/* cpr_flag */ +#define PX_NOT_CPR 0 +#define PX_ENTERED_CPR 1 + /* * Event Queue data structure. */ @@ -383,7 +388,7 @@ extern int px_link_wait4l1idle(caddr_t csr_base); extern int px_link_retrain(caddr_t csr_base); extern void px_enable_detect_quiet(caddr_t csr_base); -extern void px_lib_clr_errs(px_t *px_p); +extern void px_lib_clr_errs(px_t *px_p, dev_info_t *rdip, uint64_t addr); /* * Hotplug functions: diff --git a/usr/src/uts/sun4u/io/px/px_tools_4u.c b/usr/src/uts/sun4u/io/px/px_tools_4u.c index 7588509ce3..4c30c353d4 100644 --- a/usr/src/uts/sun4u/io/px/px_tools_4u.c +++ b/usr/src/uts/sun4u/io/px/px_tools_4u.c @@ -207,7 +207,7 @@ pxtool_safe_phys_poke(px_t *px_p, boolean_t type, size_t size, uint64_t paddr, } else err = DDI_FAILURE; - px_lib_clr_errs(px_p); + px_lib_clr_errs(px_p, 0, paddr); if (otd.ot_trap & OT_DATA_ACCESS) err = DDI_FAILURE; diff --git a/usr/src/uts/sun4v/io/px/px_err.c b/usr/src/uts/sun4v/io/px/px_err.c index 33b354c5c5..1f3f37f569 100644 --- a/usr/src/uts/sun4v/io/px/px_err.c +++ b/usr/src/uts/sun4v/io/px/px_err.c @@ -38,16 +38,37 @@ #include "px_obj.h" #include "px_err.h" -static uint_t px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt); -static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, +static void px_err_fill_pf_data(dev_info_t *dip, px_t *px_p, px_rc_err_t *epkt); +static uint_t px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt); +static int px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, int caller); -static int px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt, int caller); -static int px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt, int caller); -static int px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt, int caller); +static void px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, + boolean_t is_block_pci, char *msg); +static int px_cb_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt); +static int px_mmu_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt); +static int px_intr_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt); +static int px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt); +static int px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt); +static void px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt); +static int px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, + px_rc_err_t *epkt); + +/* Include the code generated sun4v epkt checking code */ +#include "px_err_gen.c" + +/* + * This variable indicates if we have a hypervisor that could potentially send + * incorrect epkts. We always set this to TRUE for now until we find a way to + * tell if this HV bug has been fixed. + */ +boolean_t px_legacy_epkt = B_TRUE; /* * px_err_cb_intr: @@ -60,7 +81,7 @@ px_err_cb_intr(caddr_t arg) px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; if (epkt != NULL) { - return (px_err_common_intr(fault_p, epkt)); + return (px_err_intr(fault_p, epkt)); } return (DDI_INTR_UNCLAIMED); @@ -77,55 +98,108 @@ px_err_dmc_pec_intr(caddr_t arg) px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; if (epkt != NULL) { - return (px_err_common_intr(fault_p, epkt)); + return (px_err_intr(fault_p, epkt)); } return (DDI_INTR_UNCLAIMED); } /* - * px_err_handle: + * px_err_cmn_intr: * Common function called by trap, mondo and fabric intr. * This function is more meaningful in sun4u implementation. Kept * to mirror sun4u call stack. * o check for safe access + * o create and queue RC info for later use in fabric scan. + * o RUC/WUC, PTLP, MMU Errors(CA), UR * * @param px_p leaf in which to check access * @param derr fm err data structure to be updated * @param caller PX_TRAP_CALL | PX_INTR_CALL * @param chkjbc whether to handle hostbus registers (ignored) - * @return err PX_OK | PX_NONFATAL | - * PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL + * @return err PX_NO_PANIC | PX_PROTECTED | + * PX_PANIC | PX_HW_RESET | PX_EXPECTED */ /* ARGSUSED */ int -px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, - boolean_t chkxbc) +px_err_cmn_intr(px_t *px_p, ddi_fm_error_t *derr, int caller, int block) { - /* check for safe access */ px_err_safeacc_check(px_p, derr); - return (DDI_FM_OK); } /* - * px_err_common_intr: + * fills RC specific fault data + */ +static void +px_err_fill_pfd(dev_info_t *dip, px_t *px_p, px_rc_err_t *epkt) { + pf_data_t pf_data = {0}; + int sts = DDI_SUCCESS; + pcie_req_id_t fault_bdf = 0; + uint32_t fault_addr = 0; + uint16_t s_status = 0; + + /* Add an PCIE PF_DATA Entry */ + if (epkt->rc_descr.block == BLOCK_MMU) { + /* Only PIO Fault Addresses are valid, this is DMA */ + s_status = PCI_STAT_S_TARG_AB; + fault_addr = NULL; + + if (epkt->rc_descr.H) + fault_bdf = (pcie_req_id_t)(epkt->hdr[0] >> 16); + else + sts = DDI_FAILURE; + } else { + px_pec_err_t *pec_p = (px_pec_err_t *)epkt; + uint32_t trans_type; + uint32_t dir = pec_p->pec_descr.dir; + + pf_data.aer_h0 = (uint32_t)(pec_p->hdr[0]); + pf_data.aer_h1 = (uint32_t)(pec_p->hdr[0] >> 32); + pf_data.aer_h2 = (uint32_t)(pec_p->hdr[1]); + pf_data.aer_h3 = (uint32_t)(pec_p->hdr[1] >> 32); + + /* translate RC UR/CA to legacy secondary errors */ + if ((dir == DIR_READ || dir == DIR_WRITE) && + pec_p->pec_descr.U) { + if (pec_p->ue_reg_status & PCIE_AER_UCE_UR) + s_status |= PCI_STAT_R_MAST_AB; + if (pec_p->ue_reg_status | PCIE_AER_UCE_CA) + s_status |= PCI_STAT_R_TARG_AB; + } + + if (pec_p->ue_reg_status & PCIE_AER_UCE_PTLP) + s_status |= PCI_STAT_PERROR; + + if (pec_p->ue_reg_status & PCIE_AER_UCE_CA) + s_status |= PCI_STAT_S_TARG_AB; + + sts = pf_tlp_decode(dip, &pf_data, &fault_bdf, &fault_addr, + &trans_type); + } + + if (sts == DDI_SUCCESS) + px_rp_en_q(px_p, fault_bdf, fault_addr, s_status); +} + +/* + * px_err_intr: * Interrupt handler for the JBC/DMC/PEC block. * o lock * o create derr * o check safe access - * o px_err_check_severiy(epkt) - * o dispatch + * o px_err_check_severity(epkt) + * o pcie_scan_fabric * o Idle intr state * o unlock * o handle error: fatal? fm_panic() : return INTR_CLAIMED) */ static uint_t -px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt) +px_err_intr(px_fault_t *fault_p, px_rc_err_t *epkt) { px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); dev_info_t *rpdip = px_p->px_dip; - int err, ret; + int rc_err, fab_err = PF_NO_PANIC, msg; ddi_fm_error_t derr; mutex_enter(&px_p->px_fm_mutex); @@ -137,13 +211,15 @@ px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt) derr.fme_flag = DDI_FM_ERR_UNEXPECTED; /* Basically check for safe access */ - (void) px_err_handle(px_p, &derr, PX_INTR_CALL, B_FALSE); + (void) px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_ALL); /* Check the severity of this error */ - err = px_err_check_severity(px_p, &derr, epkt, PX_INTR_CALL); + rc_err = px_err_epkt_severity(px_p, &derr, epkt, PX_INTR_CALL); - /* check for error severity */ - ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); + /* Scan the fabric if the root port is not in drain state. */ + if (!px_lib_is_in_drain_state(px_p)) + fab_err = pf_scan_fabric(rpdip, &derr, px_p->px_dq_p, + &px_p->px_dq_tail); /* Set the intr state to idle for the leaf that received the mondo */ if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, @@ -154,14 +230,27 @@ px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt) mutex_exit(&px_p->px_fm_mutex); - if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) - PX_FM_PANIC("Fatal System Bus Error has occurred\n"); + switch (epkt->rc_descr.block) { + case BLOCK_MMU: /* FALLTHROUGH */ + case BLOCK_INTR: + msg = PX_RC; + break; + case BLOCK_PCIE: + msg = PX_RP; + break; + case BLOCK_HOSTBUS: /* FALLTHROUGH */ + default: + msg = PX_HB; + break; + } + + px_err_panic(rc_err, msg, fab_err); return (DDI_INTR_CLAIMED); } /* - * px_err_check_severity: + * px_err_epkt_severity: * Check the severity of the fire error based the epkt received * * @param px_p leaf in which to take the snap shot. @@ -169,15 +258,19 @@ px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt) * @param epkt epkt recevied from HV */ static int -px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, +px_err_epkt_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, int caller) { px_pec_t *pec_p = px_p->px_pec_p; dev_info_t *dip = px_p->px_dip; + boolean_t is_safeacc = B_FALSE; + boolean_t is_block_pci = B_FALSE; + char buf[FM_MAX_CLASS], descr_buf[1024]; int err = 0; /* Cautious access error handling */ - if (derr->fme_flag == DDI_FM_ERR_EXPECTED) { + switch (derr->fme_flag) { + case DDI_FM_ERR_EXPECTED: if (caller == PX_TRAP_CALL) { /* * for ddi_caut_get treat all events as nonfatal @@ -185,6 +278,7 @@ px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, * err_status = NONFATAL. */ derr->fme_status = DDI_FM_NONFATAL; + is_safeacc = B_TRUE; } else { /* * For ddi_caut_put treat all events as nonfatal. Here @@ -192,175 +286,358 @@ px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, */ derr->fme_status = DDI_FM_NONFATAL; ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); + is_safeacc = B_TRUE; } + break; + case DDI_FM_ERR_PEEK: + case DDI_FM_ERR_POKE: + /* + * For ddi_peek/poke treat all events as nonfatal. + */ + is_safeacc = B_TRUE; + break; + default: + is_safeacc = B_FALSE; } + /* + * Older hypervisors in some cases send epkts with incorrect fields. + * We have to handle these "special" epkts correctly. + */ + if (px_legacy_epkt) + px_fix_legacy_epkt(dip, derr, epkt); + switch (epkt->rc_descr.block) { case BLOCK_HOSTBUS: - err = px_cb_check_errors(dip, derr, epkt, caller); + err = px_cb_epkt_severity(dip, derr, epkt); break; case BLOCK_MMU: - err = px_mmu_check_errors(dip, derr, epkt, caller); + err = px_mmu_epkt_severity(dip, derr, epkt); + px_err_fill_pfd(dip, px_p, epkt); break; case BLOCK_INTR: - err = PX_NONFATAL; + err = px_intr_epkt_severity(dip, derr, epkt); break; case BLOCK_PCIE: - err = px_pcie_check_errors(dip, derr, epkt, caller); + is_block_pci = B_TRUE; + err = px_pcie_epkt_severity(dip, derr, epkt); + px_err_fill_pfd(dip, px_p, epkt); break; default: - err = PX_ERR_UNKNOWN; + err = 0; } + if ((err & PX_HW_RESET) || (err & PX_PANIC)) { + if (px_log & PX_PANIC) + px_err_log_handle(dip, epkt, is_block_pci, "PANIC"); + } else if (err & PX_PROTECTED) { + if (px_log & PX_PROTECTED) + px_err_log_handle(dip, epkt, is_block_pci, "PROTECTED"); + } else if (err & PX_NO_PANIC) { + if (px_log & PX_NO_PANIC) + px_err_log_handle(dip, epkt, is_block_pci, "NO PANIC"); + } else if (err & PX_NO_ERROR) { + if (px_log & PX_NO_ERROR) + px_err_log_handle(dip, epkt, is_block_pci, "NO ERROR"); + } else if (err == 0) { + px_err_log_handle(dip, epkt, is_block_pci, "UNRECOGNIZED"); + + /* Unrecognized epkt. send ereport */ + (void) snprintf(buf, FM_MAX_CLASS, "%s", PX_FM_RC_UNRECOG); + + if (is_block_pci) { + px_pec_err_t *pec = (px_pec_err_t *)epkt; + + (void) snprintf(descr_buf, sizeof (descr_buf), + "Epkt contents:\n" + "Block: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d\n" + "I=%d, H=%d, C=%d, U=%d, E=%d, P=%d\n" + "PCI Err Status: 0x%x, PCIe Err Status: 0x%x\n" + "CE Status Reg: 0x%x, UE Status Reg: 0x%x\n" + "HDR1: 0x%lx, HDR2: 0x%lx\n" + "Err Src Reg: 0x%x, Root Err Status: 0x%x\n", + pec->pec_descr.block, pec->pec_descr.dir, + pec->pec_descr.Z, pec->pec_descr.S, + pec->pec_descr.R, pec->pec_descr.I, + pec->pec_descr.H, pec->pec_descr.C, + pec->pec_descr.U, pec->pec_descr.E, + pec->pec_descr.P, pec->pci_err_status, + pec->pcie_err_status, pec->ce_reg_status, + pec->ue_reg_status, pec->hdr[0], + pec->hdr[1], pec->err_src_reg, + pec->root_err_status); + + ddi_fm_ereport_post(dip, buf, derr->fme_ena, + DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, + EPKT_SYSINO, DATA_TYPE_UINT64, pec->sysino, + EPKT_EHDL, DATA_TYPE_UINT64, pec->ehdl, + EPKT_STICK, DATA_TYPE_UINT64, pec->stick, + EPKT_PEC_DESCR, DATA_TYPE_STRING, descr_buf); + } else { + (void) snprintf(descr_buf, sizeof (descr_buf), + "Epkt contents:\n" + "Block: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" + "Dir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d\n" + "M=%d, S=%d, Size: 0x%x, Addr: 0x%lx\n" + "Hdr1: 0x%lx, Hdr2: 0x%lx, Res: 0x%lx\n", + epkt->rc_descr.block, epkt->rc_descr.op, + epkt->rc_descr.phase, epkt->rc_descr.cond, + epkt->rc_descr.dir, epkt->rc_descr.STOP, + epkt->rc_descr.H, epkt->rc_descr.R, + epkt->rc_descr.D, epkt->rc_descr.M, + epkt->rc_descr.S, epkt->size, epkt->addr, + epkt->hdr[0], epkt->hdr[1], epkt->reserved); + + ddi_fm_ereport_post(dip, buf, derr->fme_ena, + DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, + EPKT_SYSINO, DATA_TYPE_UINT64, epkt->sysino, + EPKT_EHDL, DATA_TYPE_UINT64, epkt->ehdl, + EPKT_STICK, DATA_TYPE_UINT64, epkt->stick, + EPKT_RC_DESCR, DATA_TYPE_STRING, descr_buf); + } + + err = PX_PANIC; + } + + /* Readjust the severity as a result of safe access */ + if (is_safeacc && !(err & PX_PANIC) && !(px_die & PX_PROTECTED)) + err = PX_NO_PANIC; + return (err); } +static void +px_err_log_handle(dev_info_t *dip, px_rc_err_t *epkt, boolean_t is_block_pci, + char *msg) +{ + if (is_block_pci) { + px_pec_err_t *pec = (px_pec_err_t *)epkt; + DBG(DBG_ERR_INTR, dip, + "A PCIe root port error has occured with a severity" + " \"%s\"\n" + "\tBlock: 0x%x, Dir: 0x%x, Flags: Z=%d, S=%d, R=%d, I=%d\n" + "\tH=%d, C=%d, U=%d, E=%d, P=%d\n" + "\tpci_err: 0x%x, pcie_err=0x%x, ce_reg: 0x%x\n" + "\tue_reg: 0x%x, Hdr1: 0x%p, Hdr2: 0x%p\n" + "\terr_src: 0x%x, root_err: 0x%x\n", + msg, pec->pec_descr.block, pec->pec_descr.dir, + pec->pec_descr.Z, pec->pec_descr.S, pec->pec_descr.R, + pec->pec_descr.I, pec->pec_descr.H, pec->pec_descr.C, + pec->pec_descr.U, pec->pec_descr.E, pec->pec_descr.P, + pec->pci_err_status, pec->pcie_err_status, + pec->ce_reg_status, pec->ue_reg_status, pec->hdr[0], + pec->hdr[1], pec->err_src_reg, pec->root_err_status); + } else { + DBG(DBG_ERR_INTR, dip, + "A PCIe root complex error has occured with a severity" + " \"%s\"\n" + "\tBlock: 0x%x, Op: 0x%x, Phase: 0x%x, Cond: 0x%x\n" + "\tDir: 0x%x, Flags: STOP=%d, H=%d, R=%d, D=%d, M=%d\n" + "\tS=%d, Size: 0x%x, Addr: 0x%p\n" + "\tHdr1: 0x%p, Hdr2: 0x%p, Res: 0x%p\n", + msg, epkt->rc_descr.block, epkt->rc_descr.op, + epkt->rc_descr.phase, epkt->rc_descr.cond, + epkt->rc_descr.dir, epkt->rc_descr.STOP, epkt->rc_descr.H, + epkt->rc_descr.R, epkt->rc_descr.D, epkt->rc_descr.M, + epkt->rc_descr.S, epkt->size, epkt->addr, epkt->hdr[0], + epkt->hdr[1], epkt->reserved); + } +} + /* ARGSUSED */ -static int -px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt, int caller) +static void +px_fix_legacy_epkt(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) { - int fme_flag = derr->fme_flag; - boolean_t is_safeacc; - int ret, err = 0; - - is_safeacc = (fme_flag == DDI_FM_ERR_EXPECTED) || - (fme_flag == DDI_FM_ERR_PEEK) || - (fme_flag == DDI_FM_ERR_POKE); - - /* block/op/phase/cond/dir/flag... */ - switch (epkt->rc_descr.op) { - case OP_PIO: - err = PX_NONFATAL; - /* check handle if affected memory address is captured */ - if (epkt->rc_descr.M != 0) { - ret = px_handle_lookup(dip, ACC_HANDLE, - derr->fme_ena, (void *)epkt->addr); - } - if (ret == DDI_FM_FATAL) - err |= PX_FATAL_GOS; + /* + * We don't have a default case for any of the below switch statements + * since we are ok with the code falling through. + */ + switch (epkt->rc_descr.block) { + case BLOCK_HOSTBUS: + switch (epkt->rc_descr.op) { + case OP_DMA: + switch (epkt->rc_descr.phase) { + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_UNKNOWN: + switch (epkt->rc_descr.dir) { + case DIR_RESERVED: + epkt->rc_descr.dir = DIR_READ; + break; + } /* DIR */ + } /* CND */ + } /* PH */ + } /* OP */ break; - - case OP_DMA: - switch (epkt->rc_descr.phase) { - case PH_ADDR: - err = PX_FATAL_GOS; - break; - case PH_DATA: - if (epkt->rc_descr.cond == CND_UE) { - err = PX_FATAL_GOS; + case BLOCK_MMU: + switch (epkt->rc_descr.op) { + case OP_XLAT: + switch (epkt->rc_descr.phase) { + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_PROT: + switch (epkt->rc_descr.dir) { + case DIR_UNKNOWN: + epkt->rc_descr.dir = DIR_WRITE; + break; + } /* DIR */ + } /* CND */ break; - } - - err = PX_NONFATAL; - if (epkt->rc_descr.M == 1) { - ret = px_handle_lookup(dip, DMA_HANDLE, - derr->fme_ena, (void *)epkt->addr); - if (ret == DDI_FM_FATAL) - err |= PX_FATAL_GOS; - } - break; - default: - DBG(DBG_ERR_INTR, dip, "Unexpected epkt"); - err = PX_FATAL_GOS; - break; - } - break; - case OP_UNKNOWN: - err = PX_NONFATAL; - if ((epkt->rc_descr.cond == CND_UNMAP) || - (epkt->rc_descr.cond == CND_UE) || - (epkt->rc_descr.cond == CND_INT) || - (epkt->rc_descr.cond == CND_ILL)) - err |= PX_FATAL_GOS; - - if (epkt->rc_descr.M == 1) { - int ret1, ret2; - - ret1 = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, - (void *)epkt->addr); - ret2 = px_handle_lookup(dip, ACC_HANDLE, derr->fme_ena, - (void *)epkt->addr); - - if (ret1 == DDI_FM_FATAL || ret2 == DDI_FM_FATAL) - err |= PX_FATAL_GOS; - } + case PH_IRR: + switch (epkt->rc_descr.cond) { + case CND_RESERVED: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + epkt->rc_descr.phase = PH_ADDR; + epkt->rc_descr.cond = CND_IRR; + } /* DIR */ + } /* CND */ + } /* PH */ + } /* OP */ break; + case BLOCK_INTR: + switch (epkt->rc_descr.op) { + case OP_MSIQ: + switch (epkt->rc_descr.phase) { + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_RESERVED: + epkt->rc_descr.dir = DIR_IRR; + break; + } /* DIR */ + break; + case CND_IRR: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + epkt->rc_descr.cond = CND_OV; + break; + } /* DIR */ + } /* CND */ + } /* PH */ + break; + case OP_RESERVED: + switch (epkt->rc_descr.phase) { + case PH_UNKNOWN: + switch (epkt->rc_descr.cond) { + case CND_ILL: + switch (epkt->rc_descr.dir) { + case DIR_IRR: + epkt->rc_descr.op = OP_MSI32; + epkt->rc_descr.phase = PH_DATA; + break; + } /* DIR */ + } /* CND */ + break; + case PH_DATA: + switch (epkt->rc_descr.cond) { + case CND_INT: + switch (epkt->rc_descr.dir) { + case DIR_UNKNOWN: + epkt->rc_descr.op = OP_MSI32; + break; + } /* DIR */ + } /* CND */ + } /* PH */ + } /* OP */ + } /* BLOCK */ +} - case OP_RESERVED: - default: - DBG(DBG_ERR_INTR, NULL, "Unrecognized JBC error."); - err = PX_FATAL_GOS; - break; - } +/* ARGSUSED */ +static int +px_intr_handle_errors(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) +{ + return (px_err_check_eq(dip)); +} + +/* ARGSUSED */ +static int +px_pcie_epkt_severity(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) +{ + px_pec_err_t *pec = (px_pec_err_t *)epkt; + px_err_pcie_t *pcie = (px_err_pcie_t *)epkt; + pf_data_t pf_data; + int x; + uint32_t temp; /* - * For protected safe access, consider PX_FATAL_GOS as the only - * exception for px to take immediate panic, else, treat errors - * as nonfatal. + * Check for failed PIO Read/Writes, which are errors that are not + * defined in the PCIe spec. */ - if (is_safeacc) { - if (err & PX_FATAL_GOS) - err = PX_FATAL_GOS; + temp = PCIE_AER_UCE_UR | PCIE_AER_UCE_CA; + if (((pec->pec_descr.dir == DIR_READ) || (pec->pec_descr.dir == + DIR_WRITE)) && pec->pec_descr.U && (pec->ue_reg_status == temp)) { + pf_data.aer_h0 = (uint32_t)(pec->hdr[0]); + pf_data.aer_h1 = (uint32_t)(pec->hdr[0] >> 32); + pf_data.aer_h2 = (uint32_t)(pec->hdr[1]); + pf_data.aer_h3 = (uint32_t)(pec->hdr[1] >> 32); + + if (pf_tlp_hdl_lookup(dip, derr, &pf_data) != DDI_FM_UNKNOWN) + return (PX_NO_PANIC); else - err = PX_NONFATAL; + return (PX_PANIC); } - return (err); -} - -/* ARGSUSED */ -static int -px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt, int caller) -{ - int ret, err = 0; + if (!pec->pec_descr.C) + pec->ce_reg_status = 0; + if (!pec->pec_descr.U) + pec->ue_reg_status = 0; + if (!pec->pec_descr.H) + pec->hdr[0] = 0; + if (!pec->pec_descr.I) + pec->hdr[1] = 0; - switch (epkt->rc_descr.op) { - case OP_BYPASS: /* nonfatal */ - case OP_XLAT: /* nonfatal, stuck-fatal, fatal-reset */ - case OP_TBW: /* nonfatal, stuck-fatal */ - err = PX_NONFATAL; - break; - default: - err = PX_ERR_UNKNOWN; - break; + /* + * According to the PCIe spec, there is a first error pointer. If there + * are header logs recorded and there are more than one error, the log + * will belong to the error that the first error pointer points to. + * + * The regs.primary_ue expects a bit number, go through the ue register + * and find the first error that occured. Because the sun4v epkt spec + * does not define this value, the algorithm below gives the lower bit + * priority. + */ + temp = pcie->ue_reg; + if (temp) { + for (x = 0; !(temp & 0x1); x++) { + temp = temp >> 1; + } + pcie->primary_ue = 1 << x; + } else { + pcie->primary_ue = 0; } - if ((epkt->rc_descr.D != 0) || (epkt->rc_descr.M != 0)) { - ret = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, - (void *)epkt->addr); - if (ret == DDI_FM_FATAL) - err |= PX_FATAL_GOS; - else - err |= PX_NONFATAL; - } else - err |= PX_NONFATAL; + /* Sun4v doesn't log the TX hdr except for CTOs */ + if (pcie->primary_ue == PCIE_AER_UCE_TO) { + pcie->tx_hdr1 = pcie->rx_hdr1; + pcie->tx_hdr2 = pcie->rx_hdr2; + pcie->tx_hdr3 = pcie->rx_hdr3; + pcie->tx_hdr4 = pcie->rx_hdr4; + pcie->rx_hdr1 = 0; + pcie->rx_hdr2 = 0; + pcie->rx_hdr3 = 0; + pcie->rx_hdr4 = 0; + } else { + pcie->tx_hdr1 = 0; + pcie->tx_hdr2 = 0; + pcie->tx_hdr3 = 0; + pcie->tx_hdr4 = 0; + } - return (err); + return (px_err_check_pcie(dip, derr, pcie)); } -/* ARGSUSED */ static int -px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, - px_rc_err_t *epkt, int caller) +px_mmu_handle_lookup(dev_info_t *dip, ddi_fm_error_t *derr, px_rc_err_t *epkt) { - int ret = PX_NONFATAL; - px_pec_err_t *pec = (px_pec_err_t *)epkt; + uint32_t addr = (uint32_t)epkt->addr; + pcie_req_id_t bdf = NULL; - switch (pec->pec_descr.dir) { - case DIR_INGRESS: - case DIR_EGRESS: - case DIR_LINK: - ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, - px_fabric_die_rc_ue, px_fabric_die_rc_ue_gos); - ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, - px_fabric_die_rc_ce, px_fabric_die_rc_ce_gos); - break; - default: - ret = PX_ERR_UNKNOWN; - break; + if (epkt->rc_descr.H) { + bdf = (uint32_t)((epkt->hdr[0] >> 16) && 0xFFFF); } - return (ret); + return (pf_hdl_lookup(dip, derr->fme_ena, PF_DMA_ADDR, addr, + bdf)); } diff --git a/usr/src/uts/sun4v/io/px/px_err.h b/usr/src/uts/sun4v/io/px/px_err.h index e87130d4d9..9df5e9ff18 100644 --- a/usr/src/uts/sun4v/io/px/px_err.h +++ b/usr/src/uts/sun4v/io/px/px_err.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -112,6 +111,13 @@ extern "C" { #define DIR_UNKNOWN 0xe #define DIR_IRR 0xf +#define PX_FM_RC_UNRECOG "fire.epkt" +#define EPKT_SYSINO "sysino" +#define EPKT_EHDL "ehdl" +#define EPKT_STICK "stick" +#define EPKT_RC_DESCR "rc_descr" +#define EPKT_PEC_DESCR "pec_descr" + typedef struct root_complex { uint64_t sysino; uint64_t ehdl; @@ -122,7 +128,8 @@ typedef struct root_complex { phase : 4, cond : 4, dir : 4, - : 7, + STOP : 1, + : 6, H : 1, R : 1, D : 1, diff --git a/deleted_files/usr/src/uts/sun4v/io/px/px_err_gen.c b/usr/src/uts/sun4v/io/px/px_err_gen.c index 1b027d9a1d..1b027d9a1d 100644 --- a/deleted_files/usr/src/uts/sun4v/io/px/px_err_gen.c +++ b/usr/src/uts/sun4v/io/px/px_err_gen.c diff --git a/usr/src/uts/sun4v/io/px/px_lib4v.c b/usr/src/uts/sun4v/io/px/px_lib4v.c index 7e34da5fe2..c6563d6fcc 100644 --- a/usr/src/uts/sun4v/io/px/px_lib4v.c +++ b/usr/src/uts/sun4v/io/px/px_lib4v.c @@ -1436,8 +1436,13 @@ px_lib_map_attr_check(ddi_map_req_t *mp) * This will initiate something similar to px_fm_callback. */ static void -px_lib_log_safeacc_err(px_t *px_p, ddi_acc_handle_t handle, int fme_flag) +px_lib_log_safeacc_err(px_t *px_p, ddi_acc_handle_t handle, int fme_flag, + r_addr_t addr) { + uint32_t addr_high, addr_low; + pcie_req_id_t bdf; + px_ranges_t *ranges_p; + int range_len, i; ddi_acc_impl_t *hp = (ddi_acc_impl_t *)handle; ddi_fm_error_t derr; @@ -1449,9 +1454,41 @@ px_lib_log_safeacc_err(px_t *px_p, ddi_acc_handle_t handle, int fme_flag) if (hp) hp->ahi_err->err_expected = DDI_FM_ERR_EXPECTED; + addr_high = (uint32_t)(addr >> 32); + addr_low = (uint32_t)addr; + + /* + * Make sure this failed load came from this PCIe port. Check by + * matching the upper 32 bits of the address with the ranges property. + */ + range_len = px_p->px_ranges_length / sizeof (px_ranges_t); + i = 0; + for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) { + if (ranges_p->parent_high == addr_high) { + switch (ranges_p->child_high & PCI_ADDR_MASK) { + case PCI_ADDR_CONFIG: + bdf = (pcie_req_id_t)(addr_low >> 12); + break; + default: + bdf = NULL; + break; + } + break; + } + } + mutex_enter(&px_p->px_fm_mutex); - (void) ndi_fm_handler_dispatch(px_p->px_dip, NULL, &derr); + if (!px_lib_is_in_drain_state(px_p)) { + /* + * This is to ensure that device corresponding to the addr of + * the failed PIO/CFG load gets scanned. + */ + px_rp_en_q(px_p, bdf, addr, + (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB)); + (void) pf_scan_fabric(px_p->px_dip, &derr, + px_p->px_dq_p, &px_p->px_dq_tail); + } mutex_exit(&px_p->px_fm_mutex); } @@ -1556,7 +1593,7 @@ px_lib_ctlops_poke(dev_info_t *dip, dev_info_t *rdip, */ px_lib_log_safeacc_err(px_p, (ddi_acc_handle_t)hp, (hp ? DDI_FM_ERR_EXPECTED : - DDI_FM_ERR_POKE)); + DDI_FM_ERR_POKE), ra); pec_p->pec_ontrap_data = NULL; pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED; @@ -1646,7 +1683,7 @@ px_lib_ctlops_peek(dev_info_t *dip, dev_info_t *rdip, */ px_lib_log_safeacc_err(px_p, (ddi_acc_handle_t)hp, (hp ? DDI_FM_ERR_EXPECTED : - DDI_FM_ERR_PEEK)); + DDI_FM_ERR_PEEK), ra); /* Stuff FFs in host addr if peek. */ if (hp == NULL) { |