diff options
Diffstat (limited to 'usr/src')
10 files changed, 537 insertions, 125 deletions
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h index c6c3425f0e..3997ca9284 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h @@ -47,6 +47,7 @@ extern "C" { * grow beyond that size. As such, ereports should only be assigned class codes * when needed. NEVER CHANGE the values of these constants once assigned. */ +#ifdef sun4u #define CMD_ERRCL_UCC 0x0000000000000008ULL #define CMD_ERRCL_UCU 0x0000000000000010ULL #define CMD_ERRCL_CPC 0x0000000000000020ULL @@ -73,6 +74,18 @@ extern "C" { #define CMD_ERRCL_FRU 0x0000200000000000ULL #define CMD_ERRCL_IOCE 0x0000400000000000ULL #define CMD_ERRCL_IOUE 0x0000800000000000ULL +#else /* sun4u */ +#define CMD_ERRCL_IL2U 0x0000000000000008ULL +#define CMD_ERRCL_DL2U 0x0000000000000010ULL +#define CMD_ERRCL_L2ND 0x0000000000000020ULL +#define CMD_ERRCL_IL2ND 0x0000000000000040ULL +#define CMD_ERRCL_DL2ND 0x0000000000000080ULL +#define CMD_ERRCL_DBU 0x0000000000000100ULL +#define CMD_ERRCL_FBU 0x0000000000000200ULL +#define CMD_ERRCL_DCDP 0x0000000000000400ULL +#define CMD_ERRCL_ICDP 0x0000000000000800ULL +#define CMD_ERRCL_WBUE 0x0000000000001000ULL +#define CMD_ERRCL_CBCE 0x0000000000002000ULL #define CMD_ERRCL_DAC 0x0001000000000000ULL #define CMD_ERRCL_DSC 0x0002000000000000ULL #define CMD_ERRCL_DAU 0x0004000000000000ULL @@ -90,6 +103,7 @@ extern "C" { #define CMD_ERRCL_SBDLC 0x2000000000000000ULL #define CMD_ERRCL_TCCP 0x4000000000000000ULL #define CMD_ERRCL_TCCD 0x8000000000000000ULL +#endif /* sun4u */ #ifdef sun4u #define CMD_ERRCL_ISL2XXCU(clcode) \ @@ -100,15 +114,53 @@ extern "C" { #define CMD_ERRCL_ISIOXE(clcode) \ (((clcode) & (CMD_ERRCL_IOCE | CMD_ERRCL_IOUE)) != 0) #else /* sun4u */ +/* + * If changing the CMD_ERRCL_ISL2XXCU definition, should also + * change all the lines below it. + */ #define CMD_ERRCL_ISL2XXCU(clcode) \ - ((clcode) >= CMD_ERRCL_LDAC && (clcode) <= CMD_ERRCL_LDSU) -#define CMD_ERRCL_ISL3XXCU(clcode) 0 - -#endif /* sun4u */ + (((clcode) >= CMD_ERRCL_LDAC && (clcode) <= CMD_ERRCL_LDSU) || \ + ((clcode) >= CMD_ERRCL_IL2U && (clcode) <= CMD_ERRCL_DL2U)) #define CMD_ERRCL_ISMISCREGS(clcode) \ ((clcode) >= CMD_ERRCL_SBDPC && (clcode) <= CMD_ERRCL_TCCD) +#define CMD_ERRCL_ISL2CE(clcode) \ + (((clcode) >= CMD_ERRCL_LDAC && (clcode) <= CMD_ERRCL_LDSC) || \ + (clcode == CMD_ERRCL_CBCE)) + +#define CMD_ERRCL_ISL2ND(clcode) \ + ((clcode) >= CMD_ERRCL_L2ND && (clcode) <= CMD_ERRCL_DL2ND) + +#define CMD_ERRCL_ISMEM(clcode) \ + ((clcode & (CMD_ERRCL_DAU | CMD_ERRCL_DBU | CMD_ERRCL_FBU)) != 0) + +#define CMD_ERRCL_ISDCDP(clcode) \ + (clcode == CMD_ERRCL_DCDP) + +#define CMD_ERRCL_ISICDP(clcode) \ + (clcode == CMD_ERRCL_ICDP) + +#define CMD_ERRCL_L2UE_WRITEBACK(clcode) \ + ((clcode & (CMD_ERRCL_LDWU | CMD_ERRCL_WBUE)) != 0) + +#define CMD_ERRCL_REMOTEL2(clcode) \ + ((clcode & (CMD_ERRCL_WBUE | CMD_ERRCL_CBCE)) != 0) + +#endif /* sun4u */ + +#ifdef sun4v +#define L2_ERR 1 +#define MISCREGS_ERR 2 +#define L2ND_ERR 3 +#define MEM_ERR 4 +#define DCDP_ERR 5 +#define ICDP_ERR 6 +#define REMOTE_L2ERR 7 +#define UNKNOWN_ERR 8 +#endif + + #define CMD_ERRCL_MATCH(clcode, mask) \ (((clcode) & (mask)) != 0) @@ -195,12 +247,14 @@ typedef struct cmd { uint64_t cmd_thresh_abs_sysmem; /* Pg ret warning thresh (# of pages) */ uint64_t cmd_thresh_abs_badrw; /* Bad r/w retire thresh (# of pages) */ cmd_serd_t cmd_miscregs_serd; /* params for misregs serd */ - hrtime_t cmd_miscregs_trdelay; /* delay for redelivery misregs */ + cmd_serd_t cmd_dcache_serd; /* params for dcache serd */ + cmd_serd_t cmd_icache_serd; /* params for icache serd */ #ifdef sun4u uint16_t cmd_dp_flag; /* datapath error in progress if set */ #endif #ifdef sun4v cmd_list_t cmd_branches; /* List of branches state structures */ + uint64_t cmd_delta_ena; /* the sun4v train delta ena */ #endif nvlist_t *cmd_auth; /* DE's fault authority value */ } cmd_t; diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c index 51b5ee785f..964571de6b 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c @@ -494,6 +494,8 @@ cmd_xr_id2hdlr(fmd_hdl_t *hdl, uint_t id) return (cmd_xxc_resolve); case CMD_XR_HDLR_XXU: return (cmd_xxu_resolve); + case CMD_XR_HDLR_NOP: + return (cmd_nop_resolve); default: fmd_hdl_abort(hdl, "cmd_xr_id2hdlr called with bad hdlrid %x\n", id); @@ -514,12 +516,7 @@ cmd_xr_create(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, err |= nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &xr->xr_ena); - /* - * Skip the cmd_xr_fill() for misc reg errors because - * these data are not in the misc reg ereport - */ - if (!CMD_ERRCL_ISMISCREGS(clcode)) - err |= cmd_xr_fill(hdl, nvl, xr, clcode); + err |= cmd_xr_fill(hdl, nvl, xr, clcode); #ifdef sun4u err |= cmd_xr_pn_cache_fill(hdl, nvl, xr, cpu, clcode); #endif @@ -550,20 +547,15 @@ cmd_xr_create(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, cmd_evdisp_t cmd_xr_reschedule(fmd_hdl_t *hdl, cmd_xr_t *xr, uint_t hdlrid) { + fmd_hdl_debug(hdl, "scheduling redelivery of %llx with xr %p\n", xr->xr_clcode, xr); xr->xr_hdlrid = hdlrid; xr->xr_hdlr = cmd_xr_id2hdlr(hdl, hdlrid); - if (CMD_ERRCL_ISMISCREGS(xr->xr_clcode)) - xr->xr_id = fmd_timer_install(hdl, - (void *)CMD_TIMERTYPE_CPU_XR_WAITER, NULL, - cmd.cmd_miscregs_trdelay); - else - xr->xr_id = fmd_timer_install(hdl, - (void *)CMD_TIMERTYPE_CPU_XR_WAITER, - NULL, cmd.cmd_xxcu_trdelay); + xr->xr_id = fmd_timer_install(hdl, (void *)CMD_TIMERTYPE_CPU_XR_WAITER, + NULL, cmd.cmd_xxcu_trdelay); if (xr->xr_ref++ == 0) cmd_list_append(&cmd.cmd_xxcu_redelivs, xr); @@ -1268,15 +1260,49 @@ static const cmd_xxcu_train_t cmd_xxcu_trains[] = { CMD_TRAIN(CMD_ERRCL_LDAC, CMD_ERRCL_LDWC), CMD_TRAIN(CMD_ERRCL_LDRC, CMD_ERRCL_LDWC), CMD_TRAIN(CMD_ERRCL_LDSC, CMD_ERRCL_LDWC), + CMD_TRAIN(CMD_ERRCL_CBCE, CMD_ERRCL_LDWC), CMD_TRAIN(CMD_ERRCL_LDAU, CMD_ERRCL_LDWU), + CMD_TRAIN(CMD_ERRCL_LDAU, CMD_ERRCL_WBUE), + CMD_TRAIN(CMD_ERRCL_LDAU, CMD_ERRCL_DCDP), CMD_TRAIN(CMD_ERRCL_LDRU, CMD_ERRCL_LDWU), + CMD_TRAIN(CMD_ERRCL_LDRU, CMD_ERRCL_WBUE), + CMD_TRAIN(CMD_ERRCL_LDRU, CMD_ERRCL_DCDP), CMD_TRAIN(CMD_ERRCL_LDSU, CMD_ERRCL_LDWU), - /* SBDLC: SBDPC */ + CMD_TRAIN(CMD_ERRCL_LDSU, CMD_ERRCL_WBUE), + CMD_TRAIN(CMD_ERRCL_LDSU, CMD_ERRCL_DCDP), CMD_TRAIN(CMD_ERRCL_SBDLC, CMD_ERRCL_SBDPC), - /* TCCP: TCCD */ CMD_TRAIN(CMD_ERRCL_TCCP, CMD_ERRCL_TCCD), - /* TCCD: TCCD */ CMD_TRAIN(CMD_ERRCL_TCCD, CMD_ERRCL_TCCD), + CMD_TRAIN(CMD_ERRCL_DBU, CMD_ERRCL_DCDP), + CMD_TRAIN(CMD_ERRCL_DBU, CMD_ERRCL_ICDP), + CMD_TRAIN(CMD_ERRCL_FBU, CMD_ERRCL_DCDP), + CMD_TRAIN(CMD_ERRCL_FBU, CMD_ERRCL_ICDP), + CMD_TRAIN(CMD_ERRCL_DAU, CMD_ERRCL_DCDP), + CMD_TRAIN(CMD_ERRCL_DAU, CMD_ERRCL_ICDP), + /* + * sun4v also has the following trains, but the train + * algorithm does an exhaustive search and compare + * all pairs in the train mask, so we don't need + * to define these trains + * dl2nd->ldwu (wbue), dcdp + * il2nd->ldwu (wbue), icdp + * dxl2u->ldwu (wbue), dcdp + * ixl2u->ldwu (wbue), icdp + */ + CMD_TRAIN(CMD_ERRCL_DL2ND, CMD_ERRCL_DCDP), + CMD_TRAIN(CMD_ERRCL_DL2ND, CMD_ERRCL_LDWU), + CMD_TRAIN(CMD_ERRCL_DL2ND, CMD_ERRCL_WBUE), + CMD_TRAIN(CMD_ERRCL_IL2ND, CMD_ERRCL_ICDP), + CMD_TRAIN(CMD_ERRCL_IL2ND, CMD_ERRCL_LDWU), + CMD_TRAIN(CMD_ERRCL_IL2ND, CMD_ERRCL_WBUE), + CMD_TRAIN(CMD_ERRCL_L2ND, CMD_ERRCL_LDWU), + CMD_TRAIN(CMD_ERRCL_L2ND, CMD_ERRCL_WBUE), + CMD_TRAIN(CMD_ERRCL_DL2U, CMD_ERRCL_DCDP), + CMD_TRAIN(CMD_ERRCL_DL2U, CMD_ERRCL_LDWU), + CMD_TRAIN(CMD_ERRCL_DL2U, CMD_ERRCL_WBUE), + CMD_TRAIN(CMD_ERRCL_IL2U, CMD_ERRCL_ICDP), + CMD_TRAIN(CMD_ERRCL_IL2U, CMD_ERRCL_LDWU), + CMD_TRAIN(CMD_ERRCL_IL2U, CMD_ERRCL_WBUE), #endif /* sun4u */ CMD_TRAIN(0, 0) }; @@ -1293,29 +1319,6 @@ cmd_xxcu_train_match(cmd_errcl_t mask) return (0); } -/* - * Search for the entry that matches the ena and the AFAR - * if we have a valid AFAR, otherwise just match the ENA - */ -cmd_xxcu_trw_t * -cmd_trw_lookup(uint64_t ena, uint8_t afar_status, uint64_t afar) -{ - int i; - - if (afar_status == AFLT_STAT_VALID) { - for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) { - if (cmd.cmd_xxcu_trw[i].trw_ena == ena && - cmd.cmd_xxcu_trw[i].trw_afar == afar) - return (&cmd.cmd_xxcu_trw[i]); - } - } else { - for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) { - if (cmd.cmd_xxcu_trw[i].trw_ena == ena) - return (&cmd.cmd_xxcu_trw[i]); - } - } - return (NULL); -} cmd_xxcu_trw_t * cmd_trw_alloc(uint64_t ena, uint64_t afar) @@ -1482,6 +1485,8 @@ cmd_cpu_create_faultlist(fmd_hdl_t *hdl, fmd_case_t *casep, cmd_cpu_t *cpu, CMD_CPU_LEVEL_THREAD, cpu->cpu_type); nvlist_free(asru); } + if (!fmd_nvl_fmri_present(hdl, cpui->cpu_asru_nvl)) + continue; cpui->cpu_faulting = FMD_B_TRUE; cpu_buf_write(hdl, cpui); flt = cmd_nvl_create_fault(hdl, fltnm, cert, diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h index 6e3b45baed..c2123d4e3f 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h @@ -250,6 +250,7 @@ typedef struct cmd_xr cmd_xr_t; */ #define CMD_XR_HDLR_XXC 1 #define CMD_XR_HDLR_XXU 2 +#define CMD_XR_HDLR_NOP 3 typedef void cmd_xr_hdlr_f(fmd_hdl_t *, cmd_xr_t *, fmd_event_t *); @@ -299,6 +300,9 @@ extern void cmd_xr_write(fmd_hdl_t *, cmd_xr_t *); extern void cmd_xxc_resolve(fmd_hdl_t *, cmd_xr_t *, fmd_event_t *); extern void cmd_xxu_resolve(fmd_hdl_t *, cmd_xr_t *, fmd_event_t *); +extern void cmd_nop_resolve(fmd_hdl_t *, cmd_xr_t *, fmd_event_t *); +extern cmd_evdisp_t cmd_xxcu_initial(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t, uint_t); /* * The master structure containing or referencing all of the state for a given @@ -767,11 +771,18 @@ extern cpu_family_t cmd_cpu_check_support(void); extern boolean_t cmd_cpu_ecache_support(void); extern int cmd_xr_fill(fmd_hdl_t *, nvlist_t *, cmd_xr_t *, cmd_errcl_t); +extern void cmd_fill_errdata(cmd_errcl_t, cmd_cpu_t *, cmd_case_t **, + const errdata_t **); +extern cmd_xxcu_trw_t *cmd_trw_lookup(uint64_t, uint8_t, uint64_t); +extern cmd_evdisp_t cmd_nop_train(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +extern cmd_errcl_t cmd_train_match(cmd_errcl_t, cmd_errcl_t); +extern int cmd_afar_status_check(uint8_t, cmd_errcl_t); #ifdef sun4u -extern int cmd_cpu_synd_check(uint16_t); +extern int cmd_cpu_synd_check(uint16_t, cmd_errcl_t clcode); #else /* sun4u */ -extern int cmd_cpu_synd_check(uint32_t); +extern int cmd_cpu_synd_check(uint32_t, cmd_errcl_t clcode); #endif /* sun4u */ extern int cmd_afar_valid(fmd_hdl_t *hdl, nvlist_t *nvl, cmd_errcl_t, diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c index 2efb18e9cc..2ac88e20e0 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c @@ -262,35 +262,36 @@ CMD_OPL_UEHANDLER(oplmtlb, opl_mtlb, CMD_PTR_CPU_MTLB, "core", 1) CMD_OPL_UEHANDLER(opltlbp, opl_tlbp, CMD_PTR_CPU_TLBP, "core", 1) #endif /* sun4u */ -static const errdata_t l3errdata = - { &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_CPU_L3DATA }; -static const errdata_t l2errdata = - { &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_CPU_L2DATA }; -static const errdata_t miscregsdata = - { &cmd.cmd_miscregs_serd, "misc_reg", CMD_PTR_CPU_MISC_REGS }; - +/*ARGSUSED*/ +static void +cmd_nop_hdlr(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep) +{ + fmd_hdl_debug(hdl, "nop train resolved for clcode %llx\n", + xr->xr_clcode); +} /*ARGSUSED*/ static void cmd_xxu_hdlr(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep) { - int isl3 = CMD_ERRCL_ISL3XXCU(xr->xr_clcode); - const errdata_t *ed = isl3 ? &l3errdata : &l2errdata; + const errdata_t *ed; cmd_cpu_t *cpu = xr->xr_cpu; - cmd_case_t *cc = isl3 ? &cpu->cpu_l3data : &cpu->cpu_l2data; + cmd_case_t *cc; const char *uuid; nvlist_t *rsrc = NULL; + cmd_fill_errdata(xr->xr_clcode, cpu, &cc, &ed); + if (cpu->cpu_faulting) { CMD_STAT_BUMP(xxu_retr_flt); return; } - if (xr->xr_afar_status != AFLT_STAT_VALID) { + if (cmd_afar_status_check(xr->xr_afar_status, xr->xr_clcode) < 0) { fmd_hdl_debug(hdl, "xxU dropped, afar not VALID\n"); return; } - if (cmd_cpu_synd_check(xr->xr_synd) < 0) { + if (cmd_cpu_synd_check(xr->xr_synd, xr->xr_clcode) < 0) { fmd_hdl_debug(hdl, "xxU/LDxU dropped due to syndrome\n"); return; } @@ -353,16 +354,7 @@ cmd_xxc_hdlr(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep) return; } #endif - if (CMD_ERRCL_ISMISCREGS(xr->xr_clcode)) { - ed = &miscregsdata; - cc = &cpu->cpu_misc_regs; - } else if (CMD_ERRCL_ISL2XXCU(xr->xr_clcode)) { - ed = &l2errdata; - cc = &cpu->cpu_l2data; - } else { - ed = &l3errdata; - cc = &cpu->cpu_l3data; - } + cmd_fill_errdata(xr->xr_clcode, cpu, &cc, &ed); if (cpu->cpu_faulting || (cc->cc_cp != NULL && fmd_case_solved(hdl, cc->cc_cp))) @@ -424,12 +416,19 @@ cmd_xxcu_resolve(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep, trw->trw_flags |= CMD_TRW_F_DELETING; + /* + * In sun4v, the matching train rule is changed. It matches only + * a portion of the train mask, so can't discard the rest of + * the error in the train mask. + */ +#ifdef sun4u if (trw->trw_flags & CMD_TRW_F_CAUSESEEN) { fmd_hdl_debug(hdl, "cause already seen -- discarding\n"); goto done; } +#endif - if ((cause = cmd_xxcu_train_match(trw->trw_mask)) == 0) { + if ((cause = cmd_train_match(trw->trw_mask, xr->xr_clcode)) == 0) { /* * We didn't match in a train, so we're going to process each * event individually. @@ -467,7 +466,13 @@ cmd_xxu_resolve(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep) cmd_xxcu_resolve(hdl, xr, ep, cmd_xxu_hdlr); } -static cmd_evdisp_t +void +cmd_nop_resolve(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep) +{ + cmd_xxcu_resolve(hdl, xr, ep, cmd_nop_hdlr); +} + +cmd_evdisp_t cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode, uint_t hdlrid) { @@ -479,6 +484,7 @@ cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, uint64_t afar; uint8_t level = clcode & CMD_ERRCL_LEVEL_EXTRACT; uint8_t afar_status; + const errdata_t *ed = NULL; clcode &= CMD_ERRCL_LEVEL_MASK; /* keep level bits out of train masks */ @@ -486,12 +492,7 @@ cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, level)) == NULL || cpu->cpu_faulting) return (CMD_EVD_UNUSED); - if (CMD_ERRCL_ISMISCREGS(clcode)) - cc = &cpu->cpu_misc_regs; - else if (CMD_ERRCL_ISL2XXCU(clcode)) - cc = &cpu->cpu_l2data; - else - cc = &cpu->cpu_l3data; + cmd_fill_errdata(clcode, cpu, &cc, &ed); if (cc->cc_cp != NULL && fmd_case_solved(hdl, cc->cc_cp)) return (CMD_EVD_REDUND); @@ -532,25 +533,19 @@ cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, fmd_hdl_debug(hdl, "trw rescheduled for train delivery\n"); redeliver: - if ((xr = cmd_xr_create(hdl, ep, nvl, cpu, clcode)) == NULL) + if ((xr = cmd_xr_create(hdl, ep, nvl, cpu, clcode)) == NULL) { + fmd_hdl_debug(hdl, "cmd_xr_create failed"); return (CMD_EVD_BAD); + } return (cmd_xr_reschedule(hdl, xr, hdlrid)); } -#ifdef sun4v -#define CMD_NIAGARA_1_CLASS "ereport.cpu.ultraSPARC-T1." -#endif /* sun4v */ cmd_evdisp_t cmd_xxu(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { -#ifdef sun4v - if (strncmp(class, CMD_NIAGARA_1_CLASS, - sizeof (CMD_NIAGARA_1_CLASS)) != 0) - return (cmd_l2u(hdl, ep, nvl, class, clcode)); -#endif /* sun4v */ return (cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_XXU)); } @@ -558,15 +553,17 @@ cmd_evdisp_t cmd_xxc(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { -#ifdef sun4v - if (strncmp(class, CMD_NIAGARA_1_CLASS, - sizeof (CMD_NIAGARA_1_CLASS)) != 0) - return (cmd_l2c(hdl, ep, nvl, class, clcode)); -#endif /* sun4v */ return (cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_XXC)); } cmd_evdisp_t +cmd_nop_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, + const char *class, cmd_errcl_t clcode) +{ + return (cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP)); +} + +cmd_evdisp_t cmd_miscregs_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c index ad499ac275..db80ac0e86 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c @@ -255,11 +255,13 @@ static cmd_subscriber_t cmd_subscribers[] = { { "ereport.cpu.*.icvp", cmd_icache, CMD_CPU_LEVEL_CORE }, { "ereport.cpu.*.ictp", cmd_icache, CMD_CPU_LEVEL_CORE }, { "ereport.cpu.*.ictm", cmd_icache, CMD_CPU_LEVEL_CORE }, - { "ereport.cpu.*.icdp", cmd_icache, CMD_CPU_LEVEL_CORE }, + { "ereport.cpu.*.icdp", cmd_xxc, + CMD_ERRCL_ICDP | CMD_CPU_LEVEL_CORE }, { "ereport.cpu.*.dcvp", cmd_dcache, CMD_CPU_LEVEL_CORE }, { "ereport.cpu.*.dctp", cmd_dcache, CMD_CPU_LEVEL_CORE }, { "ereport.cpu.*.dctm", cmd_dcache, CMD_CPU_LEVEL_CORE }, - { "ereport.cpu.*.dcdp", cmd_dcache, CMD_CPU_LEVEL_CORE }, + { "ereport.cpu.*.dcdp", cmd_xxc, + CMD_ERRCL_DCDP | CMD_CPU_LEVEL_CORE }, { "ereport.cpu.*.itl2c", cmd_xxc, CMD_ERRCL_LDAC | CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.dtl2c", cmd_xxc, CMD_ERRCL_LDAC | @@ -273,13 +275,13 @@ static cmd_subscriber_t cmd_subscribers[] = { { "ereport.cpu.*.cwql2c", cmd_xxc, CMD_ERRCL_LDAC | CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.lvc", cmd_txce, CMD_CPU_LEVEL_CHIP }, - { "ereport.cpu.*.itl2u", cmd_xxu, CMD_ERRCL_LDAU | + { "ereport.cpu.*.itl2u", cmd_xxu, CMD_ERRCL_IL2U | CMD_CPU_LEVEL_CHIP }, - { "ereport.cpu.*.dtl2u", cmd_xxu, CMD_ERRCL_LDAU | + { "ereport.cpu.*.dtl2u", cmd_xxu, CMD_ERRCL_DL2U | CMD_CPU_LEVEL_CHIP }, - { "ereport.cpu.*.icl2u", cmd_xxu, CMD_ERRCL_LDAU | + { "ereport.cpu.*.icl2u", cmd_xxu, CMD_ERRCL_IL2U | CMD_CPU_LEVEL_CHIP }, - { "ereport.cpu.*.dcl2u", cmd_xxu, CMD_ERRCL_LDAU | + { "ereport.cpu.*.dcl2u", cmd_xxu, CMD_ERRCL_DL2U | CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.mal2u", cmd_xxu, CMD_ERRCL_LDAU | CMD_CPU_LEVEL_CHIP }, @@ -288,15 +290,17 @@ static cmd_subscriber_t cmd_subscribers[] = { { "ereport.cpu.*.lvf", cmd_l2ctl, CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.lrf", cmd_l2ctl, CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.ltu", cmd_l2ctl, CMD_CPU_LEVEL_CHIP }, - { "ereport.cpu.*.itl2nd", cmd_nop }, - { "ereport.cpu.*.dtl2nd", cmd_nop }, - { "ereport.cpu.*.icl2nd", cmd_nop }, - { "ereport.cpu.*.l2nd", cmd_nop }, - { "ereport.cpu.*.mal2nd", cmd_nop }, - { "ereport.cpu.*.cwql2nd", cmd_nop }, + { "ereport.cpu.*.itl2nd", cmd_nop_train, CMD_ERRCL_IL2ND }, + { "ereport.cpu.*.dtl2nd", cmd_nop_train, CMD_ERRCL_DL2ND }, + { "ereport.cpu.*.icl2nd", cmd_nop_train, CMD_ERRCL_IL2ND }, + { "ereport.cpu.*.dcl2nd", cmd_nop_train, CMD_ERRCL_DL2ND }, + { "ereport.cpu.*.l2nd", cmd_nop_train, CMD_ERRCL_L2ND }, + { "ereport.cpu.*.mal2nd", cmd_nop_train, CMD_ERRCL_L2ND }, + { "ereport.cpu.*.cwql2nd", cmd_nop_train, CMD_ERRCL_L2ND }, { "ereport.cpu.*.ldac", cmd_xxc, CMD_ERRCL_LDAC | CMD_CPU_LEVEL_CHIP }, - { "ereport.cpu.*.ldwc", cmd_nop }, + { "ereport.cpu.*.ldwc", cmd_xxc, CMD_ERRCL_LDWC | + CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.ldrc", cmd_xxc, CMD_ERRCL_LDRC | CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.ldsc", cmd_xxc, CMD_ERRCL_LDSC | @@ -304,7 +308,8 @@ static cmd_subscriber_t cmd_subscribers[] = { { "ereport.cpu.*.ltc", cmd_txce, CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.ldau", cmd_xxu, CMD_ERRCL_LDAU | CMD_CPU_LEVEL_CHIP }, - { "ereport.cpu.*.ldwu", cmd_nop }, + { "ereport.cpu.*.ldwu", cmd_xxu, CMD_ERRCL_LDWU | + CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.ldru", cmd_xxu, CMD_ERRCL_LDRU | CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.ldsu", cmd_xxu, CMD_ERRCL_LDSU | @@ -312,10 +317,11 @@ static cmd_subscriber_t cmd_subscribers[] = { { "ereport.cpu.*.lvu", cmd_l2ctl, CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.lru", cmd_l2ctl, CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.fbr", cmd_fb }, - { "ereport.cpu.*.fbu", cmd_fb }, + { "ereport.cpu.*.fbu", cmd_fb_train, CMD_ERRCL_FBU }, { "ereport.cpu.*.dac", cmd_ce, CMD_ERRCL_DAC }, { "ereport.cpu.*.dsc", cmd_ce, CMD_ERRCL_DSC }, - { "ereport.cpu.*.dau", cmd_ue, CMD_ERRCL_DAU }, + { "ereport.cpu.*.dau", cmd_ue_train, CMD_ERRCL_DAU }, + { "ereport.cpu.*.dbu", cmd_nop_train, CMD_ERRCL_DBU }, { "ereport.cpu.*.dsu", cmd_ue, CMD_ERRCL_DSU }, { "ereport.cpu.*.sbdpc", cmd_miscregs_train, CMD_ERRCL_SBDPC | CMD_CPU_LEVEL_THREAD }, @@ -347,9 +353,11 @@ static cmd_subscriber_t cmd_subscribers[] = { CMD_ERRCL_SBDPC | CMD_CPU_LEVEL_THREAD }, { "ereport.cpu.*.tsau", cmd_miscregs_ue, CMD_CPU_LEVEL_THREAD }, - { "ereport.cpu.*.cbce", cmd_xxc, CMD_CPU_LEVEL_CHIP }, + { "ereport.cpu.*.cbce", cmd_xxc, CMD_ERRCL_CBCE | + CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.dce", cmd_nop }, - { "ereport.cpu.*.wbue", cmd_nop }, + { "ereport.cpu.*.wbue", cmd_xxu, CMD_ERRCL_WBUE | + CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.lfu-slf", cmd_lfu_ce, CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.lfu-rtf", cmd_lfu_ue, CMD_CPU_LEVEL_CHIP }, { "ereport.cpu.*.lfu-tto", cmd_lfu_ue, CMD_CPU_LEVEL_CHIP }, @@ -523,17 +531,22 @@ static const fmd_prop_t fmd_props[] = { { "misc_regs_n", FMD_TYPE_UINT32, "8"}, { "misc_regs_t", FMD_TYPE_TIME, "168h" }, { "iorxefrx_window", FMD_TYPE_TIME, "3s" }, +#ifdef sun4u { "xxcu_trdelay", FMD_TYPE_TIME, "200ms" }, +#else + { "xxcu_trdelay", FMD_TYPE_TIME, "15s"}, +#endif /* sun4u */ { "xxcu_restart_delay", FMD_TYPE_TIME, "1s" }, { "num_xxcu_waiters", FMD_TYPE_UINT32, "128" }, { "thresh_tpct_sysmem", FMD_TYPE_UINT64, "100" }, { "thresh_abs_sysmem", FMD_TYPE_UINT64, "0" }, { "thresh_abs_badrw", FMD_TYPE_UINT64, "128" }, { "max_perm_ce_dimm", FMD_TYPE_UINT32, "128" }, - { "miscregs_trdelay", FMD_TYPE_TIME, "45s"}, #ifdef sun4v { "fbr_n", FMD_TYPE_UINT32, "14" }, { "fbr_t", FMD_TYPE_TIME, "30min"}, + /* delta_ena value = 0x500000000nsec ~= 22sec */ + { "delta_ena", FMD_TYPE_UINT64, "0x50000000000000"}, #endif { NULL, 0, NULL } }; @@ -795,6 +808,9 @@ _fmd_init(fmd_hdl_t *hdl) cmd.cmd_xxcu_ntrw = fmd_prop_get_int32(hdl, "num_xxcu_waiters"); cmd.cmd_xxcu_trw = fmd_hdl_zalloc(hdl, sizeof (cmd_xxcu_trw_t) * cmd.cmd_xxcu_ntrw, FMD_SLEEP); +#ifdef sun4v + cmd.cmd_delta_ena = fmd_prop_get_int64(hdl, "delta_ena"); +#endif cmd.cmd_l2data_serd.cs_name = "l2data"; cmd.cmd_l2data_serd.cs_n = fmd_prop_get_int32(hdl, "l2data_n"); @@ -804,11 +820,18 @@ _fmd_init(fmd_hdl_t *hdl) cmd.cmd_l3data_serd.cs_n = fmd_prop_get_int32(hdl, "l3data_n"); cmd.cmd_l3data_serd.cs_t = fmd_prop_get_int64(hdl, "l3data_t"); - cmd.cmd_miscregs_trdelay = fmd_prop_get_int64(hdl, "miscregs_trdelay"); cmd.cmd_miscregs_serd.cs_name = "misc_regs"; cmd.cmd_miscregs_serd.cs_n = fmd_prop_get_int32(hdl, "misc_regs_n"); cmd.cmd_miscregs_serd.cs_t = fmd_prop_get_int64(hdl, "misc_regs_t"); + cmd.cmd_dcache_serd.cs_name = "dcache"; + cmd.cmd_dcache_serd.cs_n = fmd_prop_get_int32(hdl, "dcache_n"); + cmd.cmd_dcache_serd.cs_t = fmd_prop_get_int64(hdl, "dcache_t"); + + cmd.cmd_icache_serd.cs_name = "icache"; + cmd.cmd_icache_serd.cs_n = fmd_prop_get_int32(hdl, "icache_n"); + cmd.cmd_icache_serd.cs_t = fmd_prop_get_int64(hdl, "icache_t"); + if (cmd_state_restore(hdl) < 0) { _fmd_fini(hdl); fmd_hdl_abort(hdl, "failed to restore saved state\n"); diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h index b3ba5e50d3..e4bd9347ef 100644 --- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h +++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -136,6 +136,10 @@ extern void cmd_bank_close(fmd_hdl_t *, void *); extern void cmd_branch_close(fmd_hdl_t *, void *); extern cmd_evdisp_t cmd_fb(fmd_hdl_t *, fmd_event_t *, nvlist_t *, const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_fb_train(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); +extern cmd_evdisp_t cmd_ue_train(fmd_hdl_t *, fmd_event_t *, nvlist_t *, + const char *, cmd_errcl_t); #endif /* diff --git a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c index 1b59083a26..e1ad243d9f 100644 --- a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c +++ b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c @@ -68,9 +68,9 @@ #define PN_ECSTATE_NA 5 -static const errdata_t l3errdata = +static const errdata_t clr_l3errdata = { &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_LxCACHE_CASE }; -static const errdata_t l2errdata = +static const errdata_t clr_l2errdata = { &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_LxCACHE_CASE }; @@ -1060,11 +1060,11 @@ cmd_cache_ce_panther(fmd_hdl_t *hdl, fmd_event_t *ep, cmd_xr_t *xr) if (CMD_ERRCL_ISL2XXCU(xr->xr_clcode)) { type = CMD_PTR_CPU_L2DATA; cpu_cc = &cpu->cpu_l2data; - cache_ed = &l2errdata; + cache_ed = &clr_l2errdata; } else { type = CMD_PTR_CPU_L3DATA; cpu_cc = &cpu->cpu_l3data; - cache_ed = &l3errdata; + cache_ed = &clr_l3errdata; } /* Ensure that our case is not solved */ @@ -1084,7 +1084,7 @@ cmd_cache_ce_panther(fmd_hdl_t *hdl, fmd_event_t *ep, cmd_xr_t *xr) } /* Check for valid syndrome */ - if (cmd_cpu_synd_check(xr->xr_synd) < 0) { + if (cmd_cpu_synd_check(xr->xr_synd, xr->xr_clcode) < 0) { fmd_hdl_debug(hdl, "xxC/LDxC dropped due to syndrome\n"); return (0); diff --git a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c index 1633078af1..1aa8397a5a 100644 --- a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c +++ b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -68,8 +68,67 @@ cmd_xr_fill(fmd_hdl_t *hdl, nvlist_t *nvl, cmd_xr_t *xr, cmd_errcl_t clcode) return (0); } +/* + * Search for the entry that matches the ena and the AFAR + * if we have a valid AFAR, otherwise just match the ENA + */ +cmd_xxcu_trw_t * +cmd_trw_lookup(uint64_t ena, uint8_t afar_status, uint64_t afar) +{ + int i; + + if (afar_status == AFLT_STAT_VALID) { + for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) { + if (cmd.cmd_xxcu_trw[i].trw_ena == ena && + cmd.cmd_xxcu_trw[i].trw_afar == afar) + return (&cmd.cmd_xxcu_trw[i]); + } + } else { + for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) { + if (cmd.cmd_xxcu_trw[i].trw_ena == ena) + return (&cmd.cmd_xxcu_trw[i]); + } + } + return (NULL); +} + +/*ARGSUSED*/ +cmd_errcl_t +cmd_train_match(cmd_errcl_t trw_mask, cmd_errcl_t resolved_err) +{ + return (cmd_xxcu_train_match(trw_mask)); +} + +/*ARGSUSED*/ +int +cmd_afar_status_check(uint8_t afar_status, cmd_errcl_t clcode) +{ + if (afar_status == AFLT_STAT_VALID) + return (0); + return (-1); +} + +const errdata_t l3errdata = + { &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_CPU_L3DATA }; +const errdata_t l2errdata = + { &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_CPU_L2DATA }; + +void +cmd_fill_errdata(cmd_errcl_t clcode, cmd_cpu_t *cpu, cmd_case_t **cc, + const errdata_t **ed) +{ + if (CMD_ERRCL_ISL2XXCU(clcode)) { + *ed = &l2errdata; + *cc = &cpu->cpu_l2data; + } else { + *ed = &l3errdata; + *cc = &cpu->cpu_l3data; + } +} + +/*ARGSUSED*/ int -cmd_cpu_synd_check(uint16_t synd) +cmd_cpu_synd_check(uint16_t synd, cmd_errcl_t clcode) { if (synd == CH_POISON_SYND_FROM_XXU_WRITE || synd == CH_POISON_SYND_FROM_XXU_WRMERGE || diff --git a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c index 3e3e29886c..e6e3bad01f 100644 --- a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c +++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,21 +47,221 @@ int cmd_afsr_check(fmd_hdl_t *, uint64_t, cmd_errcl_t, uint8_t *); +const errdata_t l3errdata = + { &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_CPU_L3DATA }; +const errdata_t n1l2errdata = + { &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_CPU_L2DATA }; +const errdata_t n2ce_l2errdata = + { &cmd.cmd_l2data_serd, "l2data-c", CMD_PTR_CPU_L2DATA }; +const errdata_t n2ue_l2errdata = + { &cmd.cmd_l2data_serd, "l2data-u", CMD_PTR_CPU_L2DATA }; +const errdata_t miscregsdata = + { &cmd.cmd_miscregs_serd, "misc_reg", CMD_PTR_CPU_MISC_REGS }; +const errdata_t dcachedata = + { &cmd.cmd_dcache_serd, "dcache", CMD_PTR_CPU_DCACHE }; +const errdata_t icachedata = + { &cmd.cmd_icache_serd, "icache", CMD_PTR_CPU_ICACHE }; + +static int +cmd_xr_error_type(cmd_errcl_t clcode) +{ + if (CMD_ERRCL_ISMISCREGS(clcode)) + return (MISCREGS_ERR); + else if (CMD_ERRCL_ISL2XXCU(clcode)) + return (L2_ERR); + else if (CMD_ERRCL_ISL2ND(clcode)) + return (L2ND_ERR); + else if (CMD_ERRCL_ISMEM(clcode)) + return (MEM_ERR); + else if (CMD_ERRCL_ISDCDP(clcode)) + return (DCDP_ERR); + else if (CMD_ERRCL_ISICDP(clcode)) + return (ICDP_ERR); + else if (CMD_ERRCL_REMOTEL2(clcode)) + return (REMOTE_L2ERR); + else + return (UNKNOWN_ERR); +} + +void +cmd_fill_errdata(cmd_errcl_t clcode, cmd_cpu_t *cpu, cmd_case_t **cc, + const errdata_t **ed) +{ + int err_type; + + err_type = cmd_xr_error_type(clcode); + switch (err_type) { + case MISCREGS_ERR: + *ed = &miscregsdata; + *cc = &cpu->cpu_misc_regs; + break; + case L2_ERR: + case REMOTE_L2ERR: + if (cpu->cpu_type == CPU_ULTRASPARC_T1) { + *ed = &n1l2errdata; + *cc = &cpu->cpu_l2data; + } else { + if (CMD_ERRCL_ISL2CE(clcode)) { + *ed = &n2ce_l2errdata; + *cc = &cpu->cpu_l2data; + } else { + *ed = &n2ue_l2errdata; + *cc = &cpu->cpu_l2data; + } + } + break; + case DCDP_ERR: + *ed = &dcachedata; + *cc = &cpu->cpu_dcache; + break; + case ICDP_ERR: + *ed = &icachedata; + *cc = &cpu->cpu_icache; + break; + /* + * When an error goes through the train, it requires + * to have cmd_case_t & errdata_t structures even it is not + * diagnosed when the error is resolved. Sun4v does + * does not have a L3 error, but the L3 cpu case was defined, + * so its data structures are used for the default cases. + */ + default: + *ed = &l3errdata; + *cc = &cpu->cpu_l3data; + break; + } +} + +int +cmd_afar_status_check(uint8_t afar_status, cmd_errcl_t clcode) +{ + + /* + * There is no L2 data for a remote write back + * cache error in the ereport, so skip the status check + */ + if (clcode == CMD_ERRCL_WBUE) + return (0); + + if (afar_status == AFLT_STAT_VALID) + return (0); + return (-1); +} + +/* + * Search for the entry that matches the ena and the AFAR + * if we have a valid AFAR, otherwise search for the entry + * that its's ena is < delta ENA. + */ +/*ARGSUSED*/ +cmd_xxcu_trw_t * +cmd_trw_lookup(uint64_t ena, uint8_t afar_status, uint64_t afar) +{ + int i; + + if (afar_status == AFLT_STAT_VALID) { + for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) { + if (cmd.cmd_xxcu_trw[i].trw_ena != 0) { + if ((llabs(ena - cmd.cmd_xxcu_trw[i].trw_ena) < + cmd.cmd_delta_ena) && + (cmd.cmd_xxcu_trw[i].trw_afar == afar)) + return (&cmd.cmd_xxcu_trw[i]); + } + } + } + + for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) { + if (cmd.cmd_xxcu_trw[i].trw_ena != 0) { + if (llabs(ena - cmd.cmd_xxcu_trw[i].trw_ena) + < cmd.cmd_delta_ena) + return (&cmd.cmd_xxcu_trw[i]); + } + } + + return (NULL); +} + +cmd_errcl_t +cmd_get_nextbit(cmd_errcl_t trw_mask) +{ + cmd_errcl_t tmp_mask = 0; + cmd_errcl_t tmp; + int i; + + for (i = 0; i < 64; i++) { + tmp = (0x0000000000000001ULL << i); + if (tmp & trw_mask) { + tmp_mask = tmp; + break; + } + } + return (tmp_mask); +} + +/* + * For a resolved error, its error code will be paired with + * each error code in the train mask and compared against the + * pre-defined trains in the cmd_cpu.c to determine if the error + * is in the train. + */ +cmd_errcl_t +cmd_combine_two_train(cmd_errcl_t trw_mask, cmd_errcl_t resolved_err) +{ + cmd_errcl_t tmp_mask = 0; + cmd_errcl_t train_mask = 0; + cmd_errcl_t cause = 0; + cmd_errcl_t error_mask = trw_mask ^ resolved_err; + + while (error_mask) { + tmp_mask = cmd_get_nextbit(error_mask); + if (tmp_mask == 0) + break; + train_mask = tmp_mask | resolved_err; + cause = cmd_xxcu_train_match(train_mask); + if (cause) { + return (cause); + } + error_mask = error_mask ^ tmp_mask; + } + return (0); +} + +cmd_errcl_t +cmd_train_match(cmd_errcl_t trw_mask, cmd_errcl_t resolved_err) +{ + return (cmd_combine_two_train(trw_mask, resolved_err)); +} + int cmd_xr_fill(fmd_hdl_t *hdl, nvlist_t *nvl, cmd_xr_t *xr, cmd_errcl_t clcode) { uint64_t niagara_l2_afsr = 0; + int errtype; + + errtype = cmd_xr_error_type(clcode); + /* + * skip the fill data for the errors which is not L2 errors. + */ + if (errtype != L2_ERR) { + fmd_hdl_debug(hdl, "Skip fill L2 data for errtype %d\n", + errtype); + return (0); + } if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_AFSR, &niagara_l2_afsr) != 0 && nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_ESR, - &niagara_l2_afsr) != 0) + &niagara_l2_afsr) != 0) { + fmd_hdl_debug(hdl, "No L2 AFSR data"); return (-1); + } if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_AFAR, &xr->xr_afar) != 0 && nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_EAR, - &xr->xr_afar) != 0) + &xr->xr_afar) != 0) { + fmd_hdl_debug(hdl, "No L2 AFAR data"); return (-1); + } if (nvlist_lookup_uint32(nvl, FM_EREPORT_PAYLOAD_NAME_L2_SYND, &xr->xr_synd) != 0) { /* Niagara-2 doesn't provide separate (redundant) l2-synd */ @@ -69,15 +269,17 @@ cmd_xr_fill(fmd_hdl_t *hdl, nvlist_t *nvl, cmd_xr_t *xr, cmd_errcl_t clcode) } if (cmd_afsr_check(hdl, niagara_l2_afsr, clcode, - &xr->xr_synd_status) != 0) + &xr->xr_synd_status) != 0) { + fmd_hdl_debug(hdl, "Invalid L2 syndrome"); return (-1); + } xr->xr_afar_status = xr->xr_synd_status; return (0); } int -cmd_cpu_synd_check(uint32_t synd) +cmd_cpu_synd_check(uint32_t synd, cmd_errcl_t clcode) { int i; @@ -89,6 +291,13 @@ cmd_cpu_synd_check(uint32_t synd) * 0 is an invalid syndrome because it denotes no error, but * is associated with an ereport -- meaning there WAS an error. */ + /* + * HW does not store the syndrome value for write-back cache + * error, so skip the synd check for L2 write-back error + */ + if (CMD_ERRCL_L2UE_WRITEBACK(clcode)) + return (0); + if (synd == 0) return (-1); @@ -113,6 +322,8 @@ cmd_afsr_check(fmd_hdl_t *hdl, uint64_t afsr, switch (clcode) { case CMD_ERRCL_LDAU: case CMD_ERRCL_LDSU: + case CMD_ERRCL_DL2U: + case CMD_ERRCL_IL2U: *stat_val = ((afsr & NI_L2AFSR_P02) == 0) ? AFLT_STAT_VALID: AFLT_STAT_INVALID; diff --git a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c index 047e29310d..46e350ec27 100644 --- a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c +++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -298,6 +298,30 @@ cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, /*ARGSUSED*/ cmd_evdisp_t +cmd_ue_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, + cmd_errcl_t clcode) +{ + cmd_evdisp_t rc, rc1; + + /* + * The DAU is cause of the DAU->DCDP/ICDP train: + * - process the cause of the event. + * - register the error to the nop event train, so the effected errors + * (DCDP/ICDP) will be dropped. + */ + rc = xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common); + + rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP); + if (rc1 != 0) + fmd_hdl_debug(hdl, + "Fail to add error (%llx) to the train, rc = %d", + clcode, rc1); + + return (rc); +} + +/*ARGSUSED*/ +cmd_evdisp_t cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { @@ -409,6 +433,30 @@ cmd_fb(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, return (CMD_EVD_OK); } +/*ARGSUSED*/ +cmd_evdisp_t +cmd_fb_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, + cmd_errcl_t clcode) +{ + cmd_evdisp_t rc, rc1; + + /* + * The FBU is cause of the FBU->DCDP/ICDP train: + * - process the cause of the event. + * - register the error to the nop event train, so the effected errors + * (DCDP/ICDP) will be dropped. + */ + rc = cmd_fb(hdl, ep, nvl, class, clcode); + + rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP); + if (rc1 != 0) + fmd_hdl_debug(hdl, + "Fail to add error (%llx) to the train, rc = %d", + clcode, rc1); + + return (rc); +} + void cmd_branch_close(fmd_hdl_t *hdl, void *arg) { |