diff options
author | td122701 <none@none> | 2007-07-26 09:16:35 -0700 |
---|---|---|
committer | td122701 <none@none> | 2007-07-26 09:16:35 -0700 |
commit | faff872b7fa30b801c55f8dadc61b4c8fdd0848e (patch) | |
tree | 0fd34bf1f6a1ed3a3aa634d3e6f953f908affc9c /usr/src | |
parent | de777a601dfef76d8d54837de77dc672dce47498 (diff) | |
download | illumos-gate-faff872b7fa30b801c55f8dadc61b4c8fdd0848e.tar.gz |
6565761 SERD engine trips at less than the specified error count for correctable store buffer errors
6567137 Only a single cpu is faulted with uncorrectable L2 cache error injection: kdlvfvd
Diffstat (limited to 'usr/src')
5 files changed, 88 insertions, 26 deletions
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h index 2e2bdd0820..7271716df6 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -86,6 +86,11 @@ extern "C" { #define CMD_ERRCL_LDRU 0x0400000000000000ULL #define CMD_ERRCL_LDSU 0x0800000000000000ULL +#define CMD_ERRCL_SBDPC 0x1000000000000000ULL +#define CMD_ERRCL_SBDLC 0x2000000000000000ULL +#define CMD_ERRCL_TCCP 0x4000000000000000ULL +#define CMD_ERRCL_TCCD 0x8000000000000000ULL + #ifdef sun4u #define CMD_ERRCL_ISL2XXCU(clcode) \ ((clcode) >= CMD_ERRCL_UCC && (clcode) <= CMD_ERRCL_EDU_BL) @@ -101,6 +106,9 @@ extern "C" { #endif /* sun4u */ +#define CMD_ERRCL_ISMISCREGS(clcode) \ + ((clcode) >= CMD_ERRCL_SBDPC && (clcode) <= CMD_ERRCL_TCCD) + #define CMD_ERRCL_MATCH(clcode, mask) \ (((clcode) & (mask)) != 0) @@ -182,6 +190,8 @@ typedef struct cmd { uint64_t cmd_thresh_tpct_sysmem; /* Pg ret warning thresh (% of mem) */ uint64_t cmd_thresh_abs_sysmem; /* Pg ret warning thresh (# of pages) */ uint64_t cmd_thresh_abs_badrw; /* Bad r/w retire thresh (# of pages) */ + cmd_serd_t cmd_miscregs_serd; /* params for misregs serd */ + hrtime_t cmd_miscregs_trdelay; /* delay for redelivery misregs */ #ifdef sun4u uint16_t cmd_dp_flag; /* datapath error in progress if set */ #endif diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c index c97596dfbc..541261573c 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c @@ -509,7 +509,12 @@ cmd_xr_create(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, err |= nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &xr->xr_ena); - err |= cmd_xr_fill(hdl, nvl, xr, clcode); + /* + * Skip the cmd_xr_fill() for misc reg errors because + * these data are not in the misc reg ereport + */ + if (!CMD_ERRCL_ISMISCREGS(clcode)) + err |= cmd_xr_fill(hdl, nvl, xr, clcode); (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_PAYLOAD_NAME_RESOURCE, &rsrc); @@ -543,8 +548,15 @@ cmd_xr_reschedule(fmd_hdl_t *hdl, cmd_xr_t *xr, uint_t hdlrid) xr->xr_hdlrid = hdlrid; xr->xr_hdlr = cmd_xr_id2hdlr(hdl, hdlrid); - xr->xr_id = fmd_timer_install(hdl, (void *)CMD_TIMERTYPE_CPU_XR_WAITER, - NULL, cmd.cmd_xxcu_trdelay); + + if (CMD_ERRCL_ISMISCREGS(xr->xr_clcode)) + xr->xr_id = fmd_timer_install(hdl, + (void *)CMD_TIMERTYPE_CPU_XR_WAITER, NULL, + cmd.cmd_miscregs_trdelay); + else + xr->xr_id = fmd_timer_install(hdl, + (void *)CMD_TIMERTYPE_CPU_XR_WAITER, + NULL, cmd.cmd_xxcu_trdelay); if (xr->xr_ref++ == 0) cmd_list_append(&cmd.cmd_xxcu_redelivs, xr); @@ -1252,6 +1264,12 @@ static const cmd_xxcu_train_t cmd_xxcu_trains[] = { CMD_TRAIN(CMD_ERRCL_LDAU, CMD_ERRCL_LDWU), CMD_TRAIN(CMD_ERRCL_LDRU, CMD_ERRCL_LDWU), CMD_TRAIN(CMD_ERRCL_LDSU, CMD_ERRCL_LDWU), + /* SBDLC: SBDPC */ + CMD_TRAIN(CMD_ERRCL_SBDLC, CMD_ERRCL_SBDPC), + /* TCCP: TCCD */ + CMD_TRAIN(CMD_ERRCL_TCCP, CMD_ERRCL_TCCD), + /* TCCD: TCCD */ + CMD_TRAIN(CMD_ERRCL_TCCD, CMD_ERRCL_TCCD), #endif /* sun4u */ CMD_TRAIN(0, 0) }; @@ -1537,10 +1555,10 @@ static void cpu_buf_write(fmd_hdl_t *hdl, cmd_cpu_t *cpu) { if (fmd_buf_size(hdl, NULL, cpu->cpu_bufname) != - sizeof (cmd_cpu_pers_t)) - fmd_buf_destroy(hdl, NULL, cpu->cpu_bufname); + sizeof (cmd_cpu_pers_t)) + fmd_buf_destroy(hdl, NULL, cpu->cpu_bufname); - fmd_buf_write(hdl, NULL, cpu->cpu_bufname, &cpu->cpu_pers, + fmd_buf_write(hdl, NULL, cpu->cpu_bufname, &cpu->cpu_pers, sizeof (cmd_cpu_pers_t)); } diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h index bb6eba5efe..7a186fdc64 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h @@ -652,6 +652,8 @@ extern cmd_evdisp_t cmd_miscregs_ce(fmd_hdl_t *, fmd_event_t *, nvlist_t *, extern cmd_evdisp_t cmd_miscregs_ue(fmd_hdl_t *, fmd_event_t *, nvlist_t *, const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_miscregs_train(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); /* * CPUs are described by FMRIs. This routine will retrieve the CPU state diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c index bc8cd68b37..9f89b7dba2 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c @@ -239,6 +239,8 @@ static const errdata_t l3errdata = { &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_CPU_L3DATA }; static const errdata_t l2errdata = { &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_CPU_L2DATA }; +static const errdata_t miscregsdata = + { &cmd.cmd_miscregs_serd, "misc_reg", CMD_PTR_CPU_MISC_REGS }; /*ARGSUSED*/ static void @@ -313,13 +315,23 @@ cmd_xxu_hdlr(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep) static void cmd_xxc_hdlr(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep) { - int isl3 = CMD_ERRCL_ISL3XXCU(xr->xr_clcode); - const errdata_t *ed = isl3 ? &l3errdata : &l2errdata; + const errdata_t *ed; cmd_cpu_t *cpu = xr->xr_cpu; - cmd_case_t *cc = isl3 ? &cpu->cpu_l3data : &cpu->cpu_l2data; + cmd_case_t *cc; const char *uuid; nvlist_t *rsrc = NULL; + if (CMD_ERRCL_ISMISCREGS(xr->xr_clcode)) { + ed = &miscregsdata; + cc = &cpu->cpu_misc_regs; + } else if (CMD_ERRCL_ISL2XXCU(xr->xr_clcode)) { + ed = &l2errdata; + cc = &cpu->cpu_l2data; + } else { + ed = &l3errdata; + cc = &cpu->cpu_l3data; + } + if (cpu->cpu_faulting || (cc->cc_cp != NULL && fmd_case_solved(hdl, cc->cc_cp))) return; @@ -371,7 +383,7 @@ cmd_xxcu_resolve(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep, afar = xr->xr_afar; if ((trw = cmd_trw_lookup(xr->xr_ena, - xr->xr_afar_status, afar)) == NULL) { + xr->xr_afar_status, afar)) == NULL) { fmd_hdl_debug(hdl, "cmd_trw_lookup: Not found\n"); return; } @@ -442,7 +454,13 @@ cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, level)) == NULL || cpu->cpu_faulting) return (CMD_EVD_UNUSED); - cc = CMD_ERRCL_ISL2XXCU(clcode) ? &cpu->cpu_l2data : &cpu->cpu_l3data; + if (CMD_ERRCL_ISMISCREGS(clcode)) + cc = &cpu->cpu_misc_regs; + else if (CMD_ERRCL_ISL2XXCU(clcode)) + cc = &cpu->cpu_l2data; + else + cc = &cpu->cpu_l3data; + if (cc->cc_cp != NULL && fmd_case_solved(hdl, cc->cc_cp)) return (CMD_EVD_REDUND); @@ -516,6 +534,14 @@ cmd_xxc(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, return (cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_XXC)); } +cmd_evdisp_t +cmd_miscregs_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, + const char *class, cmd_errcl_t clcode) +{ + return (cmd_xxcu_initial(hdl, ep, nvl, class, clcode, + CMD_XR_HDLR_XXC)); +} + void cmd_cpuerr_close(fmd_hdl_t *hdl, void *arg) { diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c index aab20d0626..11f560e2f0 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c @@ -283,8 +283,8 @@ static cmd_subscriber_t cmd_subscribers[] = { CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.cwql2u", cmd_xxu, CMD_ERRCL_LDAU | CMD_CPU_LEVEL_CHIP }, - { "ereport.cpu.*.lvf", cmd_l2ctl }, - { "ereport.cpu.*.lrf", cmd_l2ctl }, + { "ereport.cpu.*.lvf", cmd_l2ctl, CMD_CPU_LEVEL_CHIP }, + { "ereport.cpu.*.lrf", cmd_l2ctl, CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.itl2nd", cmd_nop }, { "ereport.cpu.*.dtl2nd", cmd_nop }, { "ereport.cpu.*.icl2nd", cmd_nop }, @@ -314,10 +314,10 @@ static cmd_subscriber_t cmd_subscribers[] = { { "ereport.cpu.*.dsc", cmd_ce, CMD_ERRCL_DSC }, { "ereport.cpu.*.dau", cmd_ue, CMD_ERRCL_DAU }, { "ereport.cpu.*.dsu", cmd_ue, CMD_ERRCL_DSU }, - { "ereport.cpu.*.sbdpc", cmd_miscregs_ce, - CMD_CPU_LEVEL_THREAD }, - { "ereport.cpu.*.sbdlc", cmd_miscregs_ce, - CMD_CPU_LEVEL_THREAD }, + { "ereport.cpu.*.sbdpc", cmd_miscregs_train, + CMD_ERRCL_SBDPC | CMD_CPU_LEVEL_THREAD }, + { "ereport.cpu.*.sbdlc", cmd_miscregs_train, + CMD_ERRCL_SBDLC | CMD_CPU_LEVEL_THREAD }, { "ereport.cpu.*.sbdpu", cmd_miscregs_ue, CMD_CPU_LEVEL_THREAD }, { "ereport.cpu.*.sbdlu", cmd_miscregs_ue, @@ -326,20 +326,20 @@ static cmd_subscriber_t cmd_subscribers[] = { CMD_CPU_LEVEL_THREAD }, { "ereport.cpu.*.sbapp", cmd_miscregs_ue, CMD_CPU_LEVEL_THREAD }, - { "ereport.cpu.*.scac", cmd_miscregs_ce, - CMD_CPU_LEVEL_THREAD }, + { "ereport.cpu.*.scac", cmd_miscregs_train, + CMD_ERRCL_SBDPC | CMD_CPU_LEVEL_THREAD }, { "ereport.cpu.*.scau", cmd_miscregs_ue, CMD_CPU_LEVEL_THREAD }, - { "ereport.cpu.*.tccp", cmd_miscregs_ce, - CMD_CPU_LEVEL_THREAD }, - { "ereport.cpu.*.tccd", cmd_miscregs_ce, - CMD_CPU_LEVEL_THREAD }, + { "ereport.cpu.*.tccp", cmd_miscregs_train, + CMD_ERRCL_TCCP | CMD_CPU_LEVEL_THREAD }, + { "ereport.cpu.*.tccd", cmd_miscregs_train, + CMD_ERRCL_TCCD | CMD_CPU_LEVEL_THREAD }, { "ereport.cpu.*.tcup", cmd_miscregs_ue, CMD_CPU_LEVEL_THREAD }, { "ereport.cpu.*.tcud", cmd_miscregs_ue, CMD_CPU_LEVEL_THREAD }, - { "ereport.cpu.*.tsac", cmd_miscregs_ce, - CMD_CPU_LEVEL_THREAD }, + { "ereport.cpu.*.tsac", cmd_miscregs_train, + CMD_ERRCL_SBDPC | CMD_CPU_LEVEL_THREAD }, { "ereport.cpu.*.tsau", cmd_miscregs_ue, CMD_CPU_LEVEL_THREAD }, #endif /* sun4u */ @@ -506,6 +506,7 @@ static const fmd_prop_t fmd_props[] = { { "thresh_abs_sysmem", FMD_TYPE_UINT64, "0" }, { "thresh_abs_badrw", FMD_TYPE_UINT64, "128" }, { "max_perm_ce_dimm", FMD_TYPE_UINT32, "128" }, + { "miscregs_trdelay", FMD_TYPE_TIME, "45s"}, { NULL, 0, NULL } }; @@ -727,6 +728,11 @@ _fmd_init(fmd_hdl_t *hdl) cmd.cmd_l3data_serd.cs_n = fmd_prop_get_int32(hdl, "l3data_n"); cmd.cmd_l3data_serd.cs_t = fmd_prop_get_int64(hdl, "l3data_t"); + cmd.cmd_miscregs_trdelay = fmd_prop_get_int64(hdl, "miscregs_trdelay"); + cmd.cmd_miscregs_serd.cs_name = "misc_regs"; + cmd.cmd_miscregs_serd.cs_n = fmd_prop_get_int32(hdl, "misc_regs_n"); + cmd.cmd_miscregs_serd.cs_t = fmd_prop_get_int64(hdl, "misc_regs_t"); + if (cmd_state_restore(hdl) < 0) { _fmd_fini(hdl); fmd_hdl_abort(hdl, "failed to restore saved state\n"); |