diff options
author | Yanmin Sun <Yanmin.Sun@Sun.COM> | 2009-10-21 16:35:06 -0700 |
---|---|---|
committer | Yanmin Sun <Yanmin.Sun@Sun.COM> | 2009-10-21 16:35:06 -0700 |
commit | 491f61a1e1c1fc54a47bbcf53dbbbe1293b93b27 (patch) | |
tree | 91ab205729699c4d0b9ef91860b4c30e21d3e5f6 /usr/src | |
parent | 096d22d43da2758693a6df66ce7d9ab54c9d464c (diff) | |
download | illumos-joyent-491f61a1e1c1fc54a47bbcf53dbbbe1293b93b27.tar.gz |
PSARC 2009/543 FMA for Nehalem_EX
6814342 FMA for Intel Nehalem EX
6874673 ereport.cpu.intel.l1tlb produces wrong fault
Diffstat (limited to 'usr/src')
24 files changed, 867 insertions, 46 deletions
diff --git a/usr/src/cmd/fm/dicts/INTEL.dict b/usr/src/cmd/fm/dicts/INTEL.dict index 434a0ce025..d5fb6e7bfc 100644 --- a/usr/src/cmd/fm/dicts/INTEL.dict +++ b/usr/src/cmd/fm/dicts/INTEL.dict @@ -1,5 +1,5 @@ # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # CDDL HEADER START @@ -76,3 +76,16 @@ fault.cpu.intel.quickpath.mem_spare=47 fault.cpu.intel.quickpath.mem_bad_id=48 fault.cpu.intel.quickpath.mem_redundant=49 fault.cpu.intel.quickpath.interconnect=50 +fault.cpu.intel.quickpath.mem_controller_ce=51 +fault.cpu.intel.quickpath.mem_controller_ue=52 +fault.cpu.intel.quickpath.mem_link_ce=53 +fault.cpu.intel.quickpath.mem_link_ue=54 +fault.cpu.intel.quickpath.mem_failover_mir=55 +fault.cpu.intel.quickpath.mem_scrubbing=56 +fault.cpu.intel.quickpath.llc_ewb=57 +fault.cpu.intel.quickpath.home_agent=58 +fault.cpu.intel.quickpath.sys_cfg=59 +fault.cpu.intel.quickpath.system_cache=60 +fault.cpu.intel.quickpath.bus_interconnect=61 +fault.cpu.intel.has_poison=62 +fault.cpu.intel.discard_fatal=63 diff --git a/usr/src/cmd/fm/dicts/INTEL.po b/usr/src/cmd/fm/dicts/INTEL.po index fac8ab23b4..82ec82c916 100644 --- a/usr/src/cmd/fm/dicts/INTEL.po +++ b/usr/src/cmd/fm/dicts/INTEL.po @@ -823,3 +823,211 @@ msgid "INTEL-8001-KP.impact" msgstr "System may panic or be reset by BIOS" msgid "INTEL-8001-KP.action" msgstr "Schedule a repair procedure to check seating of proccessors" +# +# code: INTEL-8001-LA +# keys: fault.cpu.intel.quickpath.mem_controller_ce +# +msgid "INTEL-8001-LA.type" +msgstr "Fault" +msgid "INTEL-8001-LA.severity" +msgstr "Major" +msgid "INTEL-8001-LA.description" +msgstr "A quickpath memory controller correctable error was detected. Refer to %s for more information." +msgid "INTEL-8001-LA.response" +msgstr "Processor is not off-lined, as the memory controller on the CPU chip is still accessible by other processors" +msgid "INTEL-8001-LA.impact" +msgstr "No impact to the system in the presence of this fault." +msgid "INTEL-8001-LA.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info." +# +# code: INTEL-8001-MH +# keys: fault.cpu.intel.quickpath.mem_controller_ue +# +msgid "INTEL-8001-MH.type" +msgstr "Fault" +msgid "INTEL-8001-MH.severity" +msgstr "Critical" +msgid "INTEL-8001-MH.description" +msgstr "A quickpath memory controller uncorrectable error was detected. Refer to %s for more information." +msgid "INTEL-8001-MH.response" +msgstr "Processor is not off-lined, as the memory controller on the CPU chip is still accessible by other processors" +msgid "INTEL-8001-MH.impact" +msgstr "System may panic or be reset by BIOS." +msgid "INTEL-8001-MH.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info." +# +# code: INTEL-8001-ND +# keys: fault.cpu.intel.quickpath.mem_link_ce +# +msgid "INTEL-8001-ND.type" +msgstr "Fault" +msgid "INTEL-8001-ND.severity" +msgstr "Major" +msgid "INTEL-8001-ND.description" +msgstr "A quickpath memory link correctable error was detected. Refer to %s for more information." +msgid "INTEL-8001-ND.response" +msgstr "Processor is not off-lined, as the memory controller on the CPU chip is still accessible by other processors" +msgid "INTEL-8001-ND.impact" +msgstr "No impact to the system in the presence of this fault." +msgid "INTEL-8001-ND.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info." +# +# code: INTEL-8001-PY +# keys: fault.cpu.intel.quickpath.mem_link_ue +# +msgid "INTEL-8001-PY.type" +msgstr "Fault" +msgid "INTEL-8001-PY.severity" +msgstr "Critical" +msgid "INTEL-8001-PY.description" +msgstr "A quickpath memory link uncorrectable error was detected. Refer to %s for more information." +msgid "INTEL-8001-PY.response" +msgstr "Processor is not off-lined, as the memory controller on the CPU chip is still accessible by other processors" +msgid "INTEL-8001-PY.impact" +msgstr "System may panic or be reset by BIOS." +msgid "INTEL-8001-PY.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info." +# +# code: INTEL-8001-Q3 +# keys: fault.cpu.intel.quickpath.mem_failover_mir +# +msgid "INTEL-8001-Q3.type" +msgstr "Fault" +msgid "INTEL-8001-Q3.severity" +msgstr "Major" +msgid "INTEL-8001-Q3.description" +msgstr "Uncorrectable error on the memory controller. Refer to %s for more information." +msgid "INTEL-8001-Q3.response" +msgstr "None" +msgid "INTEL-8001-Q3.impact" +msgstr "System will continue to run, but a loss of memory mirror capability." +msgid "INTEL-8001-Q3.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" +# +# code: INTEL-8001-RC +# keys: fault.cpu.intel.quickpath.mem_scrubbing +# +msgid "INTEL-8001-RC.type" +msgstr "Fault" +msgid "INTEL-8001-RC.severity" +msgstr "Minor" +msgid "INTEL-8001-RC.description" +msgstr "An uncorrectable software recoverable memory scrubbing error has occurred. Refer to %s for more information." +msgid "INTEL-8001-RC.response" +msgstr "A pair of pages on two different DIMM's will be retired." +msgid "INTEL-8001-RC.impact" +msgstr "No impact to the system in the presence of this fault." +msgid "INTEL-8001-RC.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" +# +# code: INTEL-8001-SQ +# keys: fault.cpu.intel.quickpath.llc_ewb +# +msgid "INTEL-8001-SQ.type" +msgstr "Fault" +msgid "INTEL-8001-SQ.severity" +msgstr "Minor" +msgid "INTEL-8001-SQ.description" +msgstr "An uncorreactable LLC explicit writeback error has occurred. Refer to %s for more information." +msgid "INTEL-8001-SQ.response" +msgstr "Page is retired, however the page will be unretired upon next reboot." +msgid "INTEL-8001-SQ.impact" +msgstr "No impact to the system in the presence of this fault." +msgid "INTEL-8001-SQ.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" +# +# code: INTEL-8001-T4 +# keys: fault.cpu.intel.quickpath.home_agent +# +msgid "INTEL-8001-T4.type" +msgstr "Fault" +msgid "INTEL-8001-T4.severity" +msgstr "Critical" +msgid "INTEL-8001-T4.description" +msgstr "A home agent and global coherence engine error has occurred. Refer to %s for more information." +msgid "INTEL-8001-T4.response" +msgstr "None" +msgid "INTEL-8001-T4.impact" +msgstr "System will be reset upon fatal event or may continue to run when an uncorrectable event." +msgid "INTEL-8001-T4.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" +# +# code: INTEL-8001-UR +# keys: fault.cpu.intel.quickpath.sys_cfg +# +msgid "INTEL-8001-UR.type" +msgstr "Fault" +msgid "INTEL-8001-UR.severity" +msgstr "Critical" +msgid "INTEL-8001-UR.description" +msgstr "A system configuration controller error has occurred. Refer to %s for more information." +msgid "INTEL-8001-UR.response" +msgstr "None" +msgid "INTEL-8001-UR.impact" +msgstr "System will be reset upon fatal event or may continue to run when an uncorrectable event." +msgid "INTEL-8001-UR.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" +# +# code: INTEL-8001-V2 +# keys: fault.cpu.intel.quickpath.system_cache +# +msgid "INTEL-8001-V2.type" +msgstr "Fault" +msgid "INTEL-8001-V2.severity" +msgstr "Critical" +msgid "INTEL-8001-V2.description" +msgstr "A system caching agent error has occurred. Refer to %s for more information." +msgid "INTEL-8001-V2.response" +msgstr "None" +msgid "INTEL-8001-V2.impact" +msgstr "System will be reset upon fatal event or may continue to run when an uncorrectable event." +msgid "INTEL-8001-V2.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" +# +# code: INTEL-8001-WX +# keys: fault.cpu.intel.quickpath.bus_interconnect +# +msgid "INTEL-8001-WX.type" +msgstr "Fault" +msgid "INTEL-8001-WX.severity" +msgstr "Critical" +msgid "INTEL-8001-WX.description" +msgstr "A quickpath internal port error was detected. Refer to %s for more information." +msgid "INTEL-8001-WX.response" +msgstr "None" +msgid "INTEL-8001-WX.impact" +msgstr "System may panic, maybe reset by the BIOS, or may continue to run." +msgid "INTEL-8001-WX.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" +# +# code: INTEL-8001-XE +# keys: fault.cpu.intel.has_poison +# +msgid "INTEL-8001-XE.type" +msgstr "Fault" +msgid "INTEL-8001-XE.severity" +msgstr "Minor" +msgid "INTEL-8001-XE.description" +msgstr "Cache line has been poisoned to protect integrity. Refer to %s for more information." +msgid "INTEL-8001-XE.response" +msgstr "None" +msgid "INTEL-8001-XE.impact" +msgstr "No impact to the system in the presence of this fault." +msgid "INTEL-8001-XE.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" +# +# code: INTEL-8001-YJ +# keys: fault.cpu.intel.discard_fatal +# +msgid "INTEL-8001-YJ.type" +msgstr "Fault" +msgid "INTEL-8001-YJ.severity" +msgstr "Minor" +msgid "INTEL-8001-YJ.description" +msgstr "Discarding cache line to protect data integrity. Refer to %s for more information." +msgid "INTEL-8001-YJ.response" +msgstr "None" +msgid "INTEL-8001-YJ.impact" +msgstr "No impact to the system in the presence of this fault." +msgid "INTEL-8001-YJ.action" +msgstr "Please refer to the 'Details' section of the knowledge article for additional info" diff --git a/usr/src/cmd/fm/eversholt/files/i386/i86pc/intel.esc b/usr/src/cmd/fm/eversholt/files/i386/i86pc/intel.esc index d3ec9d0b96..f4e5c3a18e 100644 --- a/usr/src/cmd/fm/eversholt/files/i386/i86pc/intel.esc +++ b/usr/src/cmd/fm/eversholt/files/i386/i86pc/intel.esc @@ -50,7 +50,8 @@ SMPL_EVENT(internal_unclassified, 1s); * Propogations for all but "external" and "unknown" simple errors. * If the error is uncorrected we produce a fault immediately, otherwise * we diagnose it to an upset and decalre a fault when the SERD engine - * trips. + * trips. prop statement for ereport.cpu.intel.internal_unclassified is + * moved to the Nehalem EX section to deal with poison case. */ engine serd.cpu.intel.simple@chip/core/strand, N=3, T=72h; @@ -63,7 +64,6 @@ prop fault.cpu.intel.internal@chip/core/strand ereport.cpu.intel.internal_timer@chip/core/strand, ereport.cpu.intel.internal_parity@chip/core/strand, ereport.cpu.intel.unclassified@chip/core/strand, - ereport.cpu.intel.internal_unclassified@chip/core/strand, ereport.cpu.intel.frc@chip/core/strand; /* @@ -174,9 +174,9 @@ CMPND_FLT_PROP_1(l1itlb, l1itlb, 3, 72h); CMPND_FLT_PROP_1(l2itlb, l2itlb, 3, 72h); CMPND_FLT_PROP_1(itlb, itlb, 12, 72h); -CMPND_FLT_PROP_1(l0tlb, litlb, 3, 72h); -CMPND_FLT_PROP_1(l1tlb, litlb, 3, 72h); -CMPND_FLT_PROP_1(l2tlb, litlb, 3, 72h); +CMPND_FLT_PROP_1(l0tlb, l0tlb, 3, 72h); +CMPND_FLT_PROP_1(l1tlb, l1tlb, 3, 72h); +CMPND_FLT_PROP_1(l2tlb, l2tlb, 3, 72h); CMPND_FLT_PROP_1(tlb, tlb, 12, 72h); CMPND_FLT_PROP_1(l0dcache, l0dcache, 3, 72h); @@ -459,22 +459,26 @@ prop fault.memory.intel.page_ue@ motherboard/chip/memory-controller/dram-channel/dimm/rank { CONTAINS_RANK && (payloadprop_defined("physaddr") || payloadprop_defined("resource[0].hc-specific.offset")) && - SET_ADDR && SET_RES_OFFSET } (1)-> + SET_ADDR && SET_RES_OFFSET } (0)-> ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller; -event fault.memory.intel.dimm_ue@ - motherboard/chip/memory-controller/dram-channel/dimm/rank; +#define CHIPDIMM motherboard/chip/memory-controller/dram-channel/dimm -prop fault.memory.intel.dimm_ue@ - motherboard/chip/memory-controller/dram-channel/dimm/rank +event fault.memory.intel.dimm_ue@CHIPDIMM/rank; + +event error.memory.intel.dimm_ue_ep@CHIPDIMM/rank; +event error.memory.intel.dimm_ue_ex@CHIPDIMM/rank; + +prop fault.memory.intel.dimm_ue@CHIPDIMM/rank (1)-> + error.memory.intel.dimm_ue_ep@CHIPDIMM/rank, + error.memory.intel.dimm_ue_ex@CHIPDIMM/rank; + +prop error.memory.intel.dimm_ue_ep@CHIPDIMM/rank { CONTAINS_RANK } (1)-> ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller; -prop fault.memory.intel.dimm_ue@ - motherboard/chip/memory-controller/dram-channel/dimm/rank (0)-> - EREPORT_BUS_ERROR; +prop fault.memory.intel.dimm_ue@CHIPDIMM/rank (0)-> EREPORT_BUS_ERROR; -#define CHIPDIMM motherboard/chip/memory-controller/dram-channel/dimm event ereport.cpu.intel.quickpath.mem_ce@ motherboard/chip/memory-controller{within(12s)}; @@ -485,7 +489,7 @@ event fault.memory.intel.page_ce@CHIPDIMM/rank, message=0, response=0, prop fault.memory.intel.page_ce@CHIPDIMM/rank { CONTAINS_RANK && (payloadprop_defined("physaddr") || payloadprop_defined("resource[0].hc-specific.offset")) && - SET_ADDR && SET_RES_OFFSET } (1)-> + SET_ADDR && SET_RES_OFFSET } (0)-> ereport.cpu.intel.quickpath.mem_ce@motherboard/chip/memory-controller; engine serd.memory.intel.dimm_ce@CHIPDIMM, N=PAGE_CE_COUNT, T=PAGE_CE_TIME; @@ -493,14 +497,14 @@ event fault.memory.intel.dimm_ce@CHIPDIMM, engine=serd.memory.intel.dimm_ce@CHIPDIMM; prop fault.memory.intel.dimm_ce@CHIPDIMM { !confprop_defined(CHIPDIMM, "dimm-size") && - count(STAT_CPU_MEM_CE_PGFLTS) > 512 } (1)-> + count(STAT_CPU_MEM_CE_PGFLTS) > 512 } (0)-> ereport.cpu.intel.quickpath.mem_ce@motherboard/chip/memory-controller; #define CPU_MEM_DIMM_CE(dimm_size, n, t, fault_rate) \ prop fault.memory.intel.dimm_ce@CHIPDIMM { \ confprop(CHIPDIMM, "dimm-size") == dimm_size && \ count(STAT_CPU_MEM_CE_PGFLTS) > fault_rate && \ - setserdn(n) & setserdt(t) } (1)-> \ + setserdn(n) & setserdt(t) } (0)-> \ ereport.cpu.intel.quickpath.mem_ce@ \ motherboard/chip/memory-controller; @@ -597,7 +601,6 @@ prop upset.cpu.intel.quickpath.interconnect@motherboard/chip { !STATUS_UC } (1)-> ereport.cpu.intel.quickpath.interconnect@motherboard/chip; - engine serd.cpu.intel.quickpath.interconnect@motherboard/chip, N=3, T=72h; event fault.cpu.intel.quickpath.interconnect@motherboard/chip, @@ -607,3 +610,340 @@ event fault.cpu.intel.quickpath.interconnect@motherboard/chip, prop fault.cpu.intel.quickpath.interconnect@motherboard/chip { STATUS_UC } (0)-> ereport.cpu.intel.quickpath.interconnect@motherboard/chip; + + +/* + * Nehalem EX specific rules + */ +/* MBox errors */ +#define EX_MEM_EVENT(leafclass, t) \ + event ereport.cpu.intel.quickpath.leafclass@ \ + motherboard/chip/memory-controller { within(t) } + +EX_MEM_EVENT(mem_lnktrns, 1s); +EX_MEM_EVENT(mem_lnkpers, 1s); +EX_MEM_EVENT(mem_sbfbdlinkerr, 1s); +EX_MEM_EVENT(mem_nbfbdlnkerr, 1s); +EX_MEM_EVENT(mem_lnkcrcvld, 1s); + +engine serd.cpu.intel.quickpath.mem_link_ce@motherboard/chip/memory-controller, + N=500, T=1week; +event fault.cpu.intel.quickpath.mem_link_ce@motherboard/chip/memory-controller, + engine=serd.cpu.intel.quickpath.mem_link_ce@motherboard/chip/memory-controller, + retire=0, response=0; + +prop fault.cpu.intel.quickpath.mem_link_ce@motherboard/chip/memory-controller -> + ereport.cpu.intel.quickpath.mem_lnktrns@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_lnkpers@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_sbfbdlinkerr@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_nbfbdlnkerr@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_lnkcrcvld@motherboard/chip/memory-controller; + +EX_MEM_EVENT(mem_lnkuncorr_uc, 1s); +EX_MEM_EVENT(mem_lnkpers_uc, 1s); +EX_MEM_EVENT(mem_sbfbdlinkerr_uc, 1s); +EX_MEM_EVENT(mem_nbfbdlnkerr_uc, 1s); +EX_MEM_EVENT(mem_lnkcrcvld_uc, 1s); + +event fault.cpu.intel.quickpath.mem_link_ue@motherboard/chip/memory-controller, + retire=0; + +prop fault.cpu.intel.quickpath.mem_link_ue@motherboard/chip/memory-controller -> + ereport.cpu.intel.quickpath.mem_lnkuncorr_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_lnkpers_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_sbfbdlinkerr_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_nbfbdlnkerr_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_lnkcrcvld_uc@motherboard/chip/memory-controller; + +EX_MEM_EVENT(mem_ptrl_fsm_err, 1s); +EX_MEM_EVENT(mem_errflw_fsm_fail, 1s); +EX_MEM_EVENT(mem_vberr, 1s); + +engine serd.cpu.intel.quickpath.mem_controller_ce@motherboard/chip/memory-controller, + N=500, T=1week; +event fault.cpu.intel.quickpath.mem_controller_ce@motherboard/chip/memory-controller, + engine=serd.cpu.intel.quickpath.mem_controller_ce@motherboard/chip/memory-controller, + retire=0, response=0; + +prop fault.cpu.intel.quickpath.mem_controller_ce@motherboard/chip/memory-controller -> + ereport.cpu.intel.quickpath.mem_ptrl_fsm_err@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_errflw_fsm_fail@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_vberr@motherboard/chip/memory-controller; + +EX_MEM_EVENT(mem_ptrl_fsm_err_uc, 1s); +EX_MEM_EVENT(mem_errflw_fsm_fail_uc, 1s); +EX_MEM_EVENT(mem_mcpar_fsmerr_uc, 1s); +EX_MEM_EVENT(mem_vberr_uc, 1s); +EX_MEM_EVENT(mem_fberr_uc, 1s); + +event fault.cpu.intel.quickpath.mem_controller_ue@motherboard/chip/memory-controller, + retire=0; + +prop fault.cpu.intel.quickpath.mem_controller_ue@motherboard/chip/memory-controller -> + ereport.cpu.intel.quickpath.mem_ptrl_fsm_err_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_errflw_fsm_fail_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_mcpar_fsmerr_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_vberr_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_fberr_uc@motherboard/chip/memory-controller; + +EX_MEM_EVENT(mem_scrubbing_uc, 1s); +event fault.cpu.intel.quickpath.mem_scrubbing@ + motherboard/chip/memory-controller/dram-channel/dimm/rank, + response=0; + +prop fault.cpu.intel.quickpath.mem_scrubbing@ + motherboard/chip/memory-controller/dram-channel/dimm/rank[rank_num] + { payloadprop_defined("rank") && rank_num == payloadprop("rank") && + (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && + SET_ADDR && SET_OFFSET } (1)-> + ereport.cpu.intel.quickpath.mem_scrubbing_uc@motherboard/chip/memory-controller; + +EX_MEM_EVENT(mem_ecc_uc, 12s); +EX_MEM_EVENT(mem_even_parity_uc, 1s); + +EX_MEM_EVENT(mem_ecc, 12s); +EX_MEM_EVENT(mem_even_parity, 1s); + +event error.memory.intel.ex_dimm_ce@ + motherboard/chip/memory-controller/dram-channel/dimm/rank; + +prop fault.memory.intel.page_ue@ + motherboard/chip/memory-controller/dram-channel/dimm/rank[rank_num] + { payloadprop_defined("rank") && rank_num == payloadprop("rank") && + (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && + SET_ADDR && SET_OFFSET } (0)-> + ereport.cpu.intel.quickpath.mem_ecc_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_even_parity_uc@motherboard/chip/memory-controller; + +prop fault.memory.intel.page_ce@ + motherboard/chip/memory-controller/dram-channel/dimm/rank[rank_num] + { payloadprop_defined("rank") && rank_num == payloadprop("rank") && + (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && + SET_ADDR && SET_OFFSET } (0)-> + ereport.cpu.intel.quickpath.mem_ecc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_even_parity@motherboard/chip/memory-controller; + +prop error.memory.intel.dimm_ue_ex@ + motherboard/chip/memory-controller/dram-channel/dimm/rank[rank_num] + { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)-> + ereport.cpu.intel.quickpath.mem_ecc_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_even_parity_uc@motherboard/chip/memory-controller; + +prop fault.memory.intel.dimm_ce@ + motherboard/chip/memory-controller/dram-channel/dimm + { !confprop_defined(motherboard/chip/memory-controller/dram-channel/dimm, + "dimm-size") && setserdn(10) & setserdt(1week) } (0)-> + error.memory.intel.ex_dimm_ce@ + motherboard/chip/memory-controller/dram-channel/dimm/rank; +prop error.memory.intel.ex_dimm_ce@ + motherboard/chip/memory-controller/dram-channel/dimm/rank[rank_num] + { payloadprop_defined("rank") && rank_num == payloadprop("rank") && + !confprop_defined(motherboard/chip/memory-controller/dram-channel/dimm, + "dimm-size") && + count(STAT_CPU_MEM_CE_PGFLTS) > 512 } (1)-> + ereport.cpu.intel.quickpath.mem_ecc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_even_parity@motherboard/chip/memory-controller; + +#define EX_CPU_MEM_DIMM_CE(dimm_size, n, t, fault_rate) \ + prop fault.memory.intel.dimm_ce@ \ + motherboard/chip/memory-controller/dram-channel/dimm { \ + confprop(motherboard/chip/memory-controller/dram-channel/dimm, \ + "dimm-size") == dimm_size && \ + setserdn(n) & setserdt(t) } (0)-> \ + error.memory.intel.ex_dimm_ce@ \ + motherboard/chip/memory-controller/dram-channel/dimm/rank; \ + prop error.memory.intel.ex_dimm_ce@ \ + motherboard/chip/memory-controller/dram-channel/dimm/rank[rank_num] { \ + payloadprop_defined("rank") && rank_num == payloadprop("rank") && \ + confprop(motherboard/chip/memory-controller/dram-channel/dimm, \ + "dimm-size") == dimm_size && \ + count(STAT_CPU_MEM_CE_PGFLTS) > fault_rate } (1)-> \ + ereport.cpu.intel.quickpath.mem_ecc@motherboard/chip/memory-controller, \ + ereport.cpu.intel.quickpath.mem_even_parity@motherboard/chip/memory-controller; + +EX_CPU_MEM_DIMM_CE("16G", 16, 1week, 2000) +EX_CPU_MEM_DIMM_CE("8G", 8, 1week, 2000) +EX_CPU_MEM_DIMM_CE("4G", 4, 1week, 1500) +EX_CPU_MEM_DIMM_CE("2G", 4, 2week, 1000) +EX_CPU_MEM_DIMM_CE("1G", 4, 4week, 500) + +event upset.memory.intel.discard@motherboard/chip/memory-controller{within(1s)}; + +prop upset.memory.intel.discard@motherboard/chip/memory-controller (0)-> + ereport.cpu.intel.quickpath.mem_scrubbing_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_ecc_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_even_parity_uc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_ecc@motherboard/chip/memory-controller, + ereport.cpu.intel.quickpath.mem_even_parity@motherboard/chip/memory-controller; + +EX_MEM_EVENT(mem_failover_mir, 1s); +event fault.cpu.intel.quickpath.mem_failover_mir@motherboard/chip/memory-controller, + retire=0; + +prop fault.cpu.intel.quickpath.mem_failover_mir@motherboard/chip/memory-controller -> + ereport.cpu.intel.quickpath.mem_failover_mir@motherboard/chip/memory-controller; + +/* + * RBox errors + */ +#define EX_EVENT(leafclass, t) \ + event ereport.cpu.intel.quickpath.leafclass@motherboard/chip { within(t) } + +engine serd.cpu.intel.quickpath.bus_interconnect@motherboard/chip, + N=3, T=72h; +event fault.cpu.intel.quickpath.bus_interconnect@motherboard/chip, + engine=serd.cpu.intel.quickpath.bus_interconnect@motherboard/chip, + retire=0; + +EX_EVENT(bus_retry_abort, 1s); +EX_EVENT(bus_link_init_ce, 1s); +event upset.cpu.intel.quickpath.discard@motherboard/chip; + +prop upset.cpu.intel.quickpath.discard@motherboard/chip (0)-> + ereport.cpu.intel.quickpath.bus_retry_abort@motherboard/chip, + ereport.cpu.intel.quickpath.bus_link_init_ce@motherboard/chip; + +EX_EVENT(bus_unknown, 1s); +EX_EVENT(bus_single_ecc, 1s); +EX_EVENT(bus_crc_flit, 1s); + +prop fault.cpu.intel.quickpath.bus_interconnect@motherboard/chip (0)-> + ereport.cpu.intel.quickpath.bus_unknown@motherboard/chip, + ereport.cpu.intel.quickpath.bus_single_ecc@motherboard/chip, + ereport.cpu.intel.quickpath.bus_crc_flit@motherboard/chip; + +EX_EVENT(bus_unknown_external, 1s); +EX_EVENT(bus_crc_flit_external, 1s); +prop upset.cpu.intel.quickpath.discard@motherboard/chip (0)-> + ereport.cpu.intel.quickpath.bus_unknown_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_crc_flit_external@motherboard/chip; + +EX_EVENT(bus_unknown_uc, 1s); +EX_EVENT(bus_opr_poison_err, 1s); +EX_EVENT(bus_eot_parity, 1s); +EX_EVENT(bus_rta_parity, 1s); +EX_EVENT(bus_bad_sbu_route, 1s); +EX_EVENT(bus_bad_msg, 1s); +EX_EVENT(bus_bad_vn_credit, 1s); +EX_EVENT(bus_hdr_double_ecc, 1s); +EX_EVENT(bus_link_retry_err, 1s); + +prop fault.cpu.intel.quickpath.bus_interconnect@motherboard/chip + { setserdincrement(4) } (0)-> + ereport.cpu.intel.quickpath.bus_unknown_uc@motherboard/chip, + ereport.cpu.intel.quickpath.bus_opr_poison_err@motherboard/chip, + ereport.cpu.intel.quickpath.bus_eot_parity@motherboard/chip, + ereport.cpu.intel.quickpath.bus_rta_parity@motherboard/chip, + ereport.cpu.intel.quickpath.bus_bad_sbu_route@motherboard/chip, + ereport.cpu.intel.quickpath.bus_bad_msg@motherboard/chip, + ereport.cpu.intel.quickpath.bus_bad_vn_credit@motherboard/chip, + ereport.cpu.intel.quickpath.bus_hdr_double_ecc@motherboard/chip, + ereport.cpu.intel.quickpath.bus_link_retry_err@motherboard/chip; + +EX_EVENT(bus_unknown_uc_external, 1s); +EX_EVENT(bus_opr_poison_err_external, 1s); +EX_EVENT(bus_eot_parity_external, 1s); +EX_EVENT(bus_rta_parity_external, 1s); +EX_EVENT(bus_bad_sbu_route_external, 1s); +EX_EVENT(bus_bad_msg_external, 1s); +EX_EVENT(bus_bad_vn_credit_external, 1s); +EX_EVENT(bus_hdr_double_ecc_external, 1s); +EX_EVENT(bus_link_retry_err_external, 1s); + +prop upset.cpu.intel.quickpath.discard@motherboard/chip (0)-> + ereport.cpu.intel.quickpath.bus_unknown_uc_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_opr_poison_err_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_eot_parity_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_rta_parity_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_bad_sbu_route_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_bad_msg_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_bad_vn_credit_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_hdr_double_ecc_external@motherboard/chip, + ereport.cpu.intel.quickpath.bus_link_retry_err_external@motherboard/chip; + +/* + * CBox errors + */ +EX_EVENT(llc_ewb_uc, 1s); +event fault.cpu.intel.quickpath.llc_ewb@motherboard/chip, + retire=0, response=0; + +prop fault.cpu.intel.quickpath.llc_ewb@motherboard/chip + { (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && + SET_ADDR && SET_OFFSET } (1)-> + ereport.cpu.intel.quickpath.llc_ewb_uc@motherboard/chip; + +prop upset.cpu.intel.quickpath.discard@motherboard/chip (0)-> + ereport.cpu.intel.quickpath.llc_ewb_uc@motherboard/chip; + +/* + * SBox errors + */ +EX_EVENT(system_cache_uc, 1s); +event fault.cpu.intel.quickpath.system_cache@motherboard/chip, + retire=0, response=0; +prop fault.cpu.intel.quickpath.system_cache@motherboard/chip -> + ereport.cpu.intel.quickpath.system_cache_uc@motherboard/chip; + +/* + * BBox errors + */ +EX_EVENT(home_agent_uc, 1s); +event fault.cpu.intel.quickpath.home_agent@motherboard/chip, + retire=0, response=0; +prop fault.cpu.intel.quickpath.home_agent@motherboard/chip -> + ereport.cpu.intel.quickpath.home_agent_uc@motherboard/chip; + +/* + * UBox errors + */ +EX_EVENT(sys_cfg_cfa_ecc, 1s); +EX_EVENT(sys_cfg_uc, 1s); + +engine serd.cpu.intel.quickpath.sys_cfg@motherboard/chip, + N=2, T=72h; +event fault.cpu.intel.quickpath.sys_cfg@motherboard/chip, + engine=serd.cpu.intel.quickpath.sys_cfg@motherboard/chip, + retire=0, response=0; + +prop fault.cpu.intel.quickpath.sys_cfg@motherboard/chip (0)-> + ereport.cpu.intel.quickpath.sys_cfg_cfa_ecc@motherboard/chip; + +prop fault.cpu.intel.quickpath.sys_cfg@motherboard/chip + { setserdincrement(3) } (0)-> + ereport.cpu.intel.quickpath.sys_cfg_uc@motherboard/chip; + +/* + * Handling poison errors + */ +engine stat.has_poison@motherboard; +event fault.cpu.intel.has_poison@motherboard, + count=stat.has_poison@motherboard[0], + message=0, retire=0, response=0; +engine stat.discard_fatal@motherboard; +event fault.cpu.intel.discard_fatal@motherboard, + count=stat.discard_fatal@motherboard[0], + message=0, retire=0, response=0; + +prop fault.cpu.intel.has_poison@motherboard + { payloadprop_defined("poison") && 1 == payloadprop("poison") } (1)-> + ereport.cpu.intel.quickpath.mem_scrubbing_uc@motherboard/chip<>/memory-controller<>, + ereport.cpu.intel.quickpath.llc_ewb_uc@motherboard/chip<>, + ereport.cpu.intel.quickpath.system_cache_uc@motherboard/chip<>, + ereport.cpu.intel.quickpath.bus_opr_poison_err@motherboard/chip<>, + ereport.cpu.intel.quickpath.bus_opr_poison_err_external@motherboard/chip<>; + +prop fault.cpu.intel.discard_fatal@motherboard + { count(stat.has_poison@motherboard[0]) > count(stat.discard_fatal@motherboard[0]) && + payloadprop_defined("bank_number") && 5 == payloadprop("bank_number") && + payloadprop_defined("processor_context_corrupt") && + 1 == payloadprop("processor_context_corrupt") } (0)-> + ereport.cpu.intel.internal_unclassified@chip<>/core<>/strand<>; + +prop fault.cpu.intel.internal@chip/core/strand + { (count(stat.has_poison@motherboard[0]) <= count(stat.discard_fatal@motherboard[0]) || + !payloadprop_defined("bank_number") || 5 != payloadprop("bank_number") || + !payloadprop_defined("processor_context_corrupt") || + 0 == payloadprop("processor_context_corrupt")) && + (payloadprop("error_uncorrected") == 1 ? setserdincrement(4) : 1) } (0)-> + ereport.cpu.intel.internal_unclassified@chip/core/strand; diff --git a/usr/src/cmd/fm/modules/common/Makefile b/usr/src/cmd/fm/modules/common/Makefile index ef4e269b52..a7facc87e3 100644 --- a/usr/src/cmd/fm/modules/common/Makefile +++ b/usr/src/cmd/fm/modules/common/Makefile @@ -35,6 +35,7 @@ SUBDIRS = cpumem-retire \ syslog-msgs \ zfs-diagnosis \ zfs-retire \ + fdd-msg \ fabric-xlate include ../../Makefile.subdirs diff --git a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c index ea1e49a1a2..4c6956bd16 100644 --- a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c +++ b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c @@ -127,6 +127,8 @@ static const cma_subscriber_t cma_subrs[] = { NULL }, { "fault.memory.datapath", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, NULL }, + { "fault.cpu.intel.quickpath.mem_scrubbing", FM_FMRI_SCHEME_HC, + FM_HC_SCHEME_VERSION, cma_page_retire }, { "fault.cpu.intel.quickpath.*", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, NULL }, { "fault.cpu.generic-x86.mc", FM_FMRI_SCHEME_HC, diff --git a/usr/src/cmd/fm/modules/common/fdd-msg/Makefile b/usr/src/cmd/fm/modules/common/fdd-msg/Makefile new file mode 100644 index 0000000000..da07c3ff28 --- /dev/null +++ b/usr/src/cmd/fm/modules/common/fdd-msg/Makefile @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +MODULE = fdd-msg +CLASS = common +SRCS = fdd_msg.c + +include ../../Makefile.plugin + +LDLIBS += -lipmi diff --git a/usr/src/cmd/fm/modules/common/fdd-msg/fdd-msg.conf b/usr/src/cmd/fm/modules/common/fdd-msg/fdd-msg.conf new file mode 100644 index 0000000000..673f715efe --- /dev/null +++ b/usr/src/cmd/fm/modules/common/fdd-msg/fdd-msg.conf @@ -0,0 +1,24 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# diff --git a/usr/src/cmd/fm/modules/common/fdd-msg/fdd_msg.c b/usr/src/cmd/fm/modules/common/fdd-msg/fdd_msg.c new file mode 100644 index 0000000000..bea31c650d --- /dev/null +++ b/usr/src/cmd/fm/modules/common/fdd-msg/fdd_msg.c @@ -0,0 +1,172 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * fdd messenger + * + * This module sends fdd running on service processor a message which + * indicates the Solaris host FMA capability when fmd is started. The + * message is sent via the BMC driver (KCS interface) to the IPMI stack + * of ILOM using the IPMI Sun OEM core tunnel command. The sub-command + * is CORE_TUNNEL_SUBCMD_HOSTCAP. The IPMI stack posts an host FMA + * capability event to the event manager upon receiving this message. + * fdd subscribes to the event manager for this event. Upon receving + * this event, fdd will adjust its configuration. + * + */ + +#include <errno.h> +#include <stdio.h> +#include <strings.h> +#include <libipmi.h> +#include <fm/fmd_api.h> + +#define CMD_SUNOEM_CORE_TUNNEL 0x44 +#define CORE_TUNNEL_SUBCMD_HOSTFMACAP 2 +#define OEM_DATA_LENGTH 3 +#define VERSION 0x10 +#define HOST_CAPABILITY 2 + +static boolean_t +ipmi_is_sun_ilom(ipmi_deviceid_t *dp) +{ + return (ipmi_devid_manufacturer(dp) == IPMI_OEM_SUN && + dp->id_product == IPMI_PROD_SUN_ILOM); +} + +static int +check_sunoem(ipmi_handle_t *ipmi_hdl) +{ + ipmi_deviceid_t *devid; + + if ((devid = ipmi_get_deviceid(ipmi_hdl)) == NULL) + return (-1); + + if (!ipmi_is_sun_ilom(devid)) + return (-2); + + return (0); +} + +/*ARGSUSED*/ +static void +send_fma_cap(fmd_hdl_t *hdl, id_t id, void *data) +{ + ipmi_handle_t *ipmi_hdl; + ipmi_cmd_t cmd; + uint8_t oem_data[OEM_DATA_LENGTH]; + + ipmi_hdl = fmd_hdl_getspecific(hdl); + + oem_data[0] = CORE_TUNNEL_SUBCMD_HOSTFMACAP; + oem_data[1] = VERSION; + oem_data[2] = HOST_CAPABILITY; + + cmd.ic_netfn = IPMI_NETFN_OEM; + cmd.ic_lun = 0; + cmd.ic_cmd = CMD_SUNOEM_CORE_TUNNEL; + cmd.ic_dlen = OEM_DATA_LENGTH; + cmd.ic_data = oem_data; + + if (ipmi_send(ipmi_hdl, &cmd) == NULL) { + fmd_hdl_debug(hdl, "Failed to send Solaris FMA " + "capability to fdd: %s", ipmi_errmsg(ipmi_hdl)); + } + + ipmi_close(ipmi_hdl); + fmd_hdl_setspecific(hdl, NULL); + fmd_hdl_unregister(hdl); +} + +static const fmd_hdl_ops_t fmd_ops = { + NULL, /* fmdo_recv */ + send_fma_cap, /* fmdo_timeout */ + NULL, /* fmdo_close */ + NULL, /* fmdo_stats */ + NULL, /* fmdo_gc */ +}; + +static const fmd_prop_t fmd_props[] = { + { "interval", FMD_TYPE_TIME, "1s" }, + { NULL, 0, NULL } +}; + +static const fmd_hdl_info_t fmd_info = { + "fdd Messenger", "1.0", &fmd_ops, fmd_props +}; + +void +_fmd_init(fmd_hdl_t *hdl) +{ + ipmi_handle_t *ipmi_hdl; + int error; + char *msg; + + if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) + return; + + if ((ipmi_hdl = ipmi_open(&error, &msg)) == NULL) { + /* + * If /dev/bmc doesn't exist on the system, then unload the + * module without doing anything. + */ + if (error != EIPMI_BMC_OPEN_FAILED) + fmd_hdl_abort(hdl, "Failed to initialize IPMI " + "connection: %s\n", msg); + fmd_hdl_debug(hdl, "Failed to load: no IPMI connection " + "present"); + fmd_hdl_unregister(hdl); + return; + } + + /* + * Check if it's Sun ILOM + */ + if (check_sunoem(ipmi_hdl) != 0) { + fmd_hdl_debug(hdl, "Service Processor does not run " + "Sun ILOM"); + ipmi_close(ipmi_hdl); + fmd_hdl_unregister(hdl); + return; + } + + fmd_hdl_setspecific(hdl, ipmi_hdl); + + /* + * Setup the timer. + */ + (void) fmd_timer_install(hdl, NULL, NULL, 0); +} + +void +_fmd_fini(fmd_hdl_t *hdl) +{ + ipmi_handle_t *ipmi_hdl = fmd_hdl_getspecific(hdl); + + if (ipmi_hdl) { + ipmi_close(ipmi_hdl); + } +} diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_intel.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_intel.c index 6e71e73b20..09f895dfa8 100644 --- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_intel.c +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_intel.c @@ -158,7 +158,7 @@ mc_add_ranks(topo_mod_t *mod, tnode_t *dnode, nvlist_t *auth, int dimm, static void mc_add_dimms(topo_mod_t *mod, tnode_t *pnode, nvlist_t *auth, - nvlist_t **nvl, uint_t ndimms, int maxranks) + nvlist_t **nvl, uint_t ndimms, int maxdimms, int maxranks) { int i; nvlist_t *fmri; @@ -168,18 +168,21 @@ mc_add_dimms(topo_mod_t *mod, tnode_t *pnode, nvlist_t *auth, nvlist_t **ranks_nvp; int32_t start_rank = -1; uint_t nranks = 0; + uint32_t dimm_number; char *serial = NULL; char *part = NULL; char *rev = NULL; char *label = NULL; char *name; - if (topo_node_range_create(mod, pnode, DIMM, 0, ndimms-1) < 0) { + if (topo_node_range_create(mod, pnode, DIMM, 0, + maxdimms ? maxdimms-1 : ndimms-1) < 0) { whinge(mod, NULL, "mc_add_dimms: node range create failed\n"); return; } for (i = 0; i < ndimms; i++) { + dimm_number = i; for (nvp = nvlist_next_nvpair(nvl[i], NULL); nvp != NULL; nvp = nvlist_next_nvpair(nvl[i], nvp)) { name = nvpair_name(nvp); @@ -196,17 +199,19 @@ mc_add_dimms(topo_mod_t *mod, tnode_t *pnode, nvlist_t *auth, (void) nvpair_value_string(nvp, &rev); } else if (strcmp(name, FM_FAULT_FRU_LABEL) == 0) { (void) nvpair_value_string(nvp, &label); + } else if (strcmp(name, MCINTEL_NVLIST_DIMM_NUM) == 0) { + (void) nvpair_value_uint32(nvp, &dimm_number); } } fmri = NULL; fmri = topo_mod_hcfmri(mod, pnode, FM_HC_SCHEME_VERSION, - DIMM, i, NULL, auth, part, rev, serial); + DIMM, dimm_number, NULL, auth, part, rev, serial); if (fmri == NULL) { whinge(mod, NULL, "mc_add_dimms: topo_mod_hcfmri failed\n"); return; } - if ((dnode = topo_node_bind(mod, pnode, DIMM, i, + if ((dnode = topo_node_bind(mod, pnode, DIMM, dimm_number, fmri)) == NULL) { nvlist_free(fmri); whinge(mod, NULL, "mc_add_dimms: node bind failed" @@ -236,15 +241,15 @@ mc_add_dimms(topo_mod_t *mod, tnode_t *pnode, nvlist_t *auth, (void) topo_node_label_set(dnode, label, &err); if (nranks) { - mc_add_ranks(mod, dnode, auth, i, ranks_nvp, start_rank, - nranks, serial, part, rev, maxranks); + mc_add_ranks(mod, dnode, auth, dimm_number, ranks_nvp, + start_rank, nranks, serial, part, rev, maxranks); } } } static int mc_add_channel(topo_mod_t *mod, tnode_t *pnode, int channel, nvlist_t *auth, - nvlist_t *nvl, int maxranks) + nvlist_t *nvl, int maxdimms, int maxranks) { tnode_t *mc_channel; nvlist_t *fmri; @@ -270,7 +275,8 @@ mc_add_channel(topo_mod_t *mod, tnode_t *pnode, int channel, nvlist_t *auth, (void) topo_pgroup_create(mc_channel, &dimm_channel_pgroup, &err); if (nvlist_lookup_nvlist_array(nvl, MCINTEL_NVLIST_DIMMS, &dimm_nvl, &ndimms) == 0) { - mc_add_dimms(mod, mc_channel, auth, dimm_nvl, ndimms, maxranks); + mc_add_dimms(mod, mc_channel, auth, dimm_nvl, ndimms, maxdimms, + maxranks); } for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { @@ -292,6 +298,7 @@ mc_nb_create(topo_mod_t *mod, tnode_t *pnode, const char *name, nvlist_t *auth, int channel; uint8_t nmc; uint8_t maxranks; + uint8_t maxdimms; tnode_t *mcnode; nvlist_t *fmri; nvlist_t **channel_nvl; @@ -323,6 +330,8 @@ mc_nb_create(topo_mod_t *mod, tnode_t *pnode, const char *name, nvlist_t *auth, } if (nvlist_lookup_uint8(nvl, MCINTEL_NVLIST_NRANKS, &maxranks) != 0) maxranks = 2; + if (nvlist_lookup_uint8(nvl, MCINTEL_NVLIST_NDIMMS, &maxdimms) != 0) + maxdimms = 0; if (topo_node_range_create(mod, pnode, name, 0, nmc-1) < 0) { whinge(mod, NULL, "mc_nb_create: node range create failed\n"); @@ -354,7 +363,7 @@ mc_nb_create(topo_mod_t *mod, tnode_t *pnode, const char *name, nvlist_t *auth, } for (j = 0; j < nchannels; j++) { if (mc_add_channel(mod, mcnode, channel, auth, - channel_nvl[channel], maxranks) < 0) { + channel_nvl[channel], maxdimms, maxranks) < 0) { return (-1); } channel++; @@ -365,6 +374,7 @@ mc_nb_create(topo_mod_t *mod, tnode_t *pnode, const char *name, nvlist_t *auth, if (strcmp(pname, MCINTEL_NVLIST_MC) != 0 && strcmp(pname, MCINTEL_NVLIST_NMEM) != 0 && strcmp(pname, MCINTEL_NVLIST_NRANKS) != 0 && + strcmp(pname, MCINTEL_NVLIST_NDIMMS) != 0 && strcmp(pname, MCINTEL_NVLIST_VERSTR) != 0 && strcmp(pname, MCINTEL_NVLIST_MEM) != 0) { (void) nvprop_add(mod, nvp, PGNAME(MCT), diff --git a/usr/src/pkgdefs/SUNWcakr.i/prototype_com b/usr/src/pkgdefs/SUNWcakr.i/prototype_com index 8bc0475440..e481636991 100644 --- a/usr/src/pkgdefs/SUNWcakr.i/prototype_com +++ b/usr/src/pkgdefs/SUNWcakr.i/prototype_com @@ -53,10 +53,12 @@ d none platform/i86pc/kernel/cpu/amd64 755 root sys f none platform/i86pc/kernel/cpu/amd64/cpu_ms.AuthenticAMD 755 root sys f none platform/i86pc/kernel/cpu/amd64/cpu_ms.AuthenticAMD.15 755 root sys f none platform/i86pc/kernel/cpu/amd64/cpu_ms.GenuineIntel 755 root sys +f none platform/i86pc/kernel/cpu/amd64/cpu_ms.GenuineIntel.6.46 755 root sys f none platform/i86pc/kernel/cpu/amd64/cpu.generic 755 root sys f none platform/i86pc/kernel/cpu/cpu_ms.AuthenticAMD 755 root sys f none platform/i86pc/kernel/cpu/cpu_ms.AuthenticAMD.15 755 root sys f none platform/i86pc/kernel/cpu/cpu_ms.GenuineIntel 755 root sys +f none platform/i86pc/kernel/cpu/cpu_ms.GenuineIntel.6.46 755 root sys f none platform/i86pc/kernel/cpu/cpu.generic 755 root sys d none platform/i86pc/kernel/dacf 755 root sys f none platform/i86pc/kernel/dacf/consconfig_dacf 755 root sys diff --git a/usr/src/pkgdefs/SUNWckr/prototype_i386 b/usr/src/pkgdefs/SUNWckr/prototype_i386 index 01877af5dd..bae6c08944 100644 --- a/usr/src/pkgdefs/SUNWckr/prototype_i386 +++ b/usr/src/pkgdefs/SUNWckr/prototype_i386 @@ -94,6 +94,8 @@ f none kernel/drv/intel_nb5000 755 root sys f none kernel/drv/intel_nb5000.conf 644 root sys f none kernel/drv/intel_nhm 755 root sys f none kernel/drv/intel_nhm.conf 644 root sys +f none kernel/drv/intel_nhmex 755 root sys +f none kernel/drv/intel_nhmex.conf 644 root sys f none kernel/drv/ip 755 root sys f none kernel/drv/ip6 755 root sys f none kernel/drv/ipnet 755 root sys @@ -317,6 +319,7 @@ f none kernel/drv/amd64/icmp 755 root sys f none kernel/drv/amd64/icmp6 755 root sys f none kernel/drv/amd64/intel_nb5000 755 root sys f none kernel/drv/amd64/intel_nhm 755 root sys +f none kernel/drv/amd64/intel_nhmex 755 root sys f none kernel/drv/amd64/ip 755 root sys f none kernel/drv/amd64/ip6 755 root sys f none kernel/drv/amd64/ipnet 755 root sys diff --git a/usr/src/pkgdefs/SUNWfmd/prototype_com b/usr/src/pkgdefs/SUNWfmd/prototype_com index 9c63d5268b..65ea84e8fd 100644 --- a/usr/src/pkgdefs/SUNWfmd/prototype_com +++ b/usr/src/pkgdefs/SUNWfmd/prototype_com @@ -80,6 +80,8 @@ f none usr/lib/fm/fmd/plugins/eft.conf 644 root bin f none usr/lib/fm/fmd/plugins/eft.so 555 root bin f none usr/lib/fm/fmd/plugins/fabric-xlate.conf 644 root bin f none usr/lib/fm/fmd/plugins/fabric-xlate.so 555 root bin +f none usr/lib/fm/fmd/plugins/fdd-msg.conf 644 root bin +f none usr/lib/fm/fmd/plugins/fdd-msg.so 555 root bin f none usr/lib/fm/fmd/plugins/io-retire.conf 644 root bin f none usr/lib/fm/fmd/plugins/io-retire.so 555 root bin f none usr/lib/fm/fmd/plugins/ip-transport.so 555 root bin diff --git a/usr/src/uts/Makefile.uts b/usr/src/uts/Makefile.uts index cb79b12fa0..0f144dced5 100644 --- a/usr/src/uts/Makefile.uts +++ b/usr/src/uts/Makefile.uts @@ -607,7 +607,7 @@ KMODS = $(GENUNIX_KMODS) $(PARALLEL_KMODS) $(PARALLEL_KMODS): $(GENUNIX_KMODS) $(CLOSED_BUILD)CLOSED_KMODS = $(CLOSED_DRV_KMODS) $(CLOSED_TOD_KMODS) \ - $(CLOSED_MISC_KMODS) \ + $(CLOSED_MISC_KMODS) $(CLOSED_CPU_KMODS) \ $(CLOSED_NLMISC_KMODS) $(CLOSED_DRV_KMODS_$(CLASS)) LINT_KMODS = $(DRV_KMODS) $(EXEC_KMODS) $(FS_KMODS) $(SCHED_KMODS) \ diff --git a/usr/src/uts/i86pc/Makefile.i86pc.shared b/usr/src/uts/i86pc/Makefile.i86pc.shared index cd8d7d1737..89b47413f3 100644 --- a/usr/src/uts/i86pc/Makefile.i86pc.shared +++ b/usr/src/uts/i86pc/Makefile.i86pc.shared @@ -276,6 +276,12 @@ CPU_KMODS += authenticamd CPU_KMODS += genuineintel # +# Don't build some of these for OpenSolaris, since they will be +# replaced by binaries that are signed by Sun Release Engineering. +# +$(CLOSED_BUILD)CLOSED_CPU_KMODS += intel_nhmex + +# # Exec Class Modules (/kernel/exec): # EXEC_KMODS += diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c index 087bce5b9d..2e206d47a4 100644 --- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c +++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c @@ -338,7 +338,7 @@ static struct gcpu_mnexp gcpu_MMM_mnemonics[] = { /* MCAX86_ERRCODE_MMM_* */ { "RD", FM_EREPORT_CPU_GENERIC_MMM_RD }, /* READ */ { "WR", FM_EREPORT_CPU_GENERIC_MMM_WR }, /* WRITE */ { "ADDR_CMD", FM_EREPORT_CPU_GENERIC_MMM_ADRCMD }, /* ADDR, CMD */ - { GCPU_MNEMONIC_RESVD, ""}, /* RESERVED */ + { "SCRUB", FM_EREPORT_CPU_GENERIC_MMM_SCRUB }, { GCPU_MNEMONIC_RESVD, ""}, /* RESERVED */ { GCPU_MNEMONIC_RESVD, ""}, /* RESERVED */ { GCPU_MNEMONIC_RESVD, ""} /* RESERVED */ @@ -843,7 +843,8 @@ gcpu_ereport_post(const gcpu_logout_t *gcl, int bankidx, /* * The detector FMRI. */ - if ((detector = cms_ereport_detector(hdl, mscookie, nva)) == NULL) + if ((detector = cms_ereport_detector(hdl, bankidx, mscookie, + nva)) == NULL) detector = gcpu_fmri_create(hdl, nva); /* diff --git a/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c b/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c index 8c214458ed..bb70237eb6 100644 --- a/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c +++ b/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c @@ -249,8 +249,10 @@ gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie, } } +/*ARGSUSED*/ nvlist_t * -gintel_ereport_detector(cmi_hdl_t hdl, cms_cookie_t mscookie, nv_alloc_t *nva) +gintel_ereport_detector(cmi_hdl_t hdl, int bankno, cms_cookie_t mscookie, + nv_alloc_t *nva) { nvlist_t *nvl = (nvlist_t *)NULL; diff --git a/usr/src/uts/i86pc/os/cms.c b/usr/src/uts/i86pc/os/cms.c index 721b5a4db7..18d3b046f6 100644 --- a/usr/src/uts/i86pc/os/cms.c +++ b/usr/src/uts/i86pc/os/cms.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/cpu_module_ms_impl.h> #include <sys/cpuvar.h> @@ -604,12 +602,14 @@ cms_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie, const char **cpuclsp, } nvlist_t * -cms_ereport_detector(cmi_hdl_t hdl, cms_cookie_t mscookie, nv_alloc_t *nva) +cms_ereport_detector(cmi_hdl_t hdl, int bankno, cms_cookie_t mscookie, + nv_alloc_t *nva) { cms_t *cms = HDL2CMS(hdl); if (CMS_OP_PRESENT(cms, cms_ereport_detector)) - return (CMS_OPS(cms)->cms_ereport_detector(hdl, mscookie, nva)); + return (CMS_OPS(cms)->cms_ereport_detector(hdl, bankno, + mscookie, nva)); else return (NULL); diff --git a/usr/src/uts/i86pc/sys/cpu_module_ms.h b/usr/src/uts/i86pc/sys/cpu_module_ms.h index f997fbba97..eac4adaf70 100644 --- a/usr/src/uts/i86pc/sys/cpu_module_ms.h +++ b/usr/src/uts/i86pc/sys/cpu_module_ms.h @@ -20,15 +20,13 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _CPU_MODULE_MS_H #define _CPU_MODULE_MS_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/cpuvar.h> #include <sys/nvpair.h> @@ -116,7 +114,8 @@ extern cms_cookie_t cms_disp_match(cmi_hdl_t, int, uint64_t, uint64_t, uint64_t, void *); extern void cms_ereport_class(cmi_hdl_t, cms_cookie_t, const char **, const char **); -extern nvlist_t *cms_ereport_detector(cmi_hdl_t, cms_cookie_t, nv_alloc_t *); +extern nvlist_t *cms_ereport_detector(cmi_hdl_t, int, cms_cookie_t, + nv_alloc_t *); extern boolean_t cms_ereport_includestack(cmi_hdl_t, cms_cookie_t); extern void cms_ereport_add_logout(cmi_hdl_t, nvlist_t *, nv_alloc_t *, int, uint64_t, uint64_t, uint64_t, void *, cms_cookie_t); diff --git a/usr/src/uts/i86pc/sys/cpu_module_ms_impl.h b/usr/src/uts/i86pc/sys/cpu_module_ms_impl.h index 1a545bcf76..cb1448b407 100644 --- a/usr/src/uts/i86pc/sys/cpu_module_ms_impl.h +++ b/usr/src/uts/i86pc/sys/cpu_module_ms_impl.h @@ -20,15 +20,13 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _CPU_MODULE_MS_IMPL_H #define _CPU_MODULE_MS_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/cpu_module_ms.h> #include <sys/cpuvar.h> #include <sys/x86_archext.h> @@ -71,7 +69,7 @@ typedef struct cms_ops { uint64_t, void *); void (*cms_ereport_class)(cmi_hdl_t, cms_cookie_t, const char **, const char **); - nvlist_t *(*cms_ereport_detector)(cmi_hdl_t, cms_cookie_t, + nvlist_t *(*cms_ereport_detector)(cmi_hdl_t, int, cms_cookie_t, nv_alloc_t *); boolean_t (*cms_ereport_includestack)(cmi_hdl_t, cms_cookie_t); void (*cms_ereport_add_logout)(cmi_hdl_t, nvlist_t *, diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared index 62e5fb9f75..dee5a31f60 100644 --- a/usr/src/uts/intel/Makefile.intel.shared +++ b/usr/src/uts/intel/Makefile.intel.shared @@ -379,6 +379,7 @@ $(CLOSED_BUILD)DRV_KMODS += dca $(CLOSED_BUILD)CLOSED_DRV_KMODS += bmc $(CLOSED_BUILD)CLOSED_DRV_KMODS += elxl $(CLOSED_BUILD)CLOSED_DRV_KMODS += glm +$(CLOSED_BUILD)CLOSED_DRV_KMODS += intel_nhmex $(CLOSED_BUILD)CLOSED_DRV_KMODS += iprb $(CLOSED_BUILD)CLOSED_DRV_KMODS += llc2 $(CLOSED_BUILD)CLOSED_DRV_KMODS += marvell88sx diff --git a/usr/src/uts/intel/os/driver_aliases b/usr/src/uts/intel/os/driver_aliases index dbedff44ab..b35d9deb02 100644 --- a/usr/src/uts/intel/os/driver_aliases +++ b/usr/src/uts/intel/os/driver_aliases @@ -48,6 +48,7 @@ intel_nb5000 "pci8086,4003" intel_nb5000 "pci8086,65c0" intel_nhm "pci8086,3423" intel_nhm "pci8086,372a" +intel_nhmex "pci8086,3422" xpv "pci5853,1.1" amd_iommu "pci1022,11ff" amd_iommu "pci1002,5a23" diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major index c7b3a97986..625eb5f337 100644 --- a/usr/src/uts/intel/os/name_to_major +++ b/usr/src/uts/intel/os/name_to_major @@ -160,3 +160,4 @@ bridge 265 iptun 266 iptunq 267 bpf 268 +intel_nhmex 269 diff --git a/usr/src/uts/intel/sys/fm/cpu/GMCA.h b/usr/src/uts/intel/sys/fm/cpu/GMCA.h index 0e5a5cf82d..c84171873b 100644 --- a/usr/src/uts/intel/sys/fm/cpu/GMCA.h +++ b/usr/src/uts/intel/sys/fm/cpu/GMCA.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -138,6 +138,7 @@ extern "C" { #define FM_EREPORT_CPU_GENERIC_MMM_RD "" #define FM_EREPORT_CPU_GENERIC_MMM_WR "" #define FM_EREPORT_CPU_GENERIC_MMM_ADRCMD "" +#define FM_EREPORT_CPU_GENERIC_MMM_SCRUB "" /* * Ereport payload member names together with bitmask values to select diff --git a/usr/src/uts/intel/sys/mc_intel.h b/usr/src/uts/intel/sys/mc_intel.h index adcf186582..f0c83b1d8b 100644 --- a/usr/src/uts/intel/sys/mc_intel.h +++ b/usr/src/uts/intel/sys/mc_intel.h @@ -44,8 +44,10 @@ extern "C" { #define MCINTEL_NVLIST_DIMMS "memory-dimms" #define MCINTEL_NVLIST_DIMMSZ "memory-dimm-size" #define MCINTEL_NVLIST_NRANKS "dimm-max-ranks" +#define MCINTEL_NVLIST_NDIMMS "dimm-max-dimms" #define MCINTEL_NVLIST_RANKS "dimm-ranks" #define MCINTEL_NVLIST_1ST_RANK "dimm-start-rank" +#define MCINTEL_NVLIST_DIMM_NUM "dimm-number" #define MCINTEL_NVLIST_ROWS "dimm-rows" #define MCINTEL_NVLIST_COL "dimm-column" #define MCINTEL_NVLIST_BANK "dimm-banks" |