summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h64
-rw-r--r--usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c85
-rw-r--r--usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h15
-rw-r--r--usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c85
-rw-r--r--usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c63
-rw-r--r--usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h6
-rw-r--r--usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c10
-rw-r--r--usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c63
-rw-r--r--usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c221
-rw-r--r--usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c50
10 files changed, 537 insertions, 125 deletions
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h
index c6c3425f0e..3997ca9284 100644
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h
@@ -47,6 +47,7 @@ extern "C" {
* grow beyond that size. As such, ereports should only be assigned class codes
* when needed. NEVER CHANGE the values of these constants once assigned.
*/
+#ifdef sun4u
#define CMD_ERRCL_UCC 0x0000000000000008ULL
#define CMD_ERRCL_UCU 0x0000000000000010ULL
#define CMD_ERRCL_CPC 0x0000000000000020ULL
@@ -73,6 +74,18 @@ extern "C" {
#define CMD_ERRCL_FRU 0x0000200000000000ULL
#define CMD_ERRCL_IOCE 0x0000400000000000ULL
#define CMD_ERRCL_IOUE 0x0000800000000000ULL
+#else /* sun4u */
+#define CMD_ERRCL_IL2U 0x0000000000000008ULL
+#define CMD_ERRCL_DL2U 0x0000000000000010ULL
+#define CMD_ERRCL_L2ND 0x0000000000000020ULL
+#define CMD_ERRCL_IL2ND 0x0000000000000040ULL
+#define CMD_ERRCL_DL2ND 0x0000000000000080ULL
+#define CMD_ERRCL_DBU 0x0000000000000100ULL
+#define CMD_ERRCL_FBU 0x0000000000000200ULL
+#define CMD_ERRCL_DCDP 0x0000000000000400ULL
+#define CMD_ERRCL_ICDP 0x0000000000000800ULL
+#define CMD_ERRCL_WBUE 0x0000000000001000ULL
+#define CMD_ERRCL_CBCE 0x0000000000002000ULL
#define CMD_ERRCL_DAC 0x0001000000000000ULL
#define CMD_ERRCL_DSC 0x0002000000000000ULL
#define CMD_ERRCL_DAU 0x0004000000000000ULL
@@ -90,6 +103,7 @@ extern "C" {
#define CMD_ERRCL_SBDLC 0x2000000000000000ULL
#define CMD_ERRCL_TCCP 0x4000000000000000ULL
#define CMD_ERRCL_TCCD 0x8000000000000000ULL
+#endif /* sun4u */
#ifdef sun4u
#define CMD_ERRCL_ISL2XXCU(clcode) \
@@ -100,15 +114,53 @@ extern "C" {
#define CMD_ERRCL_ISIOXE(clcode) \
(((clcode) & (CMD_ERRCL_IOCE | CMD_ERRCL_IOUE)) != 0)
#else /* sun4u */
+/*
+ * If changing the CMD_ERRCL_ISL2XXCU definition, should also
+ * change all the lines below it.
+ */
#define CMD_ERRCL_ISL2XXCU(clcode) \
- ((clcode) >= CMD_ERRCL_LDAC && (clcode) <= CMD_ERRCL_LDSU)
-#define CMD_ERRCL_ISL3XXCU(clcode) 0
-
-#endif /* sun4u */
+ (((clcode) >= CMD_ERRCL_LDAC && (clcode) <= CMD_ERRCL_LDSU) || \
+ ((clcode) >= CMD_ERRCL_IL2U && (clcode) <= CMD_ERRCL_DL2U))
#define CMD_ERRCL_ISMISCREGS(clcode) \
((clcode) >= CMD_ERRCL_SBDPC && (clcode) <= CMD_ERRCL_TCCD)
+#define CMD_ERRCL_ISL2CE(clcode) \
+ (((clcode) >= CMD_ERRCL_LDAC && (clcode) <= CMD_ERRCL_LDSC) || \
+ (clcode == CMD_ERRCL_CBCE))
+
+#define CMD_ERRCL_ISL2ND(clcode) \
+ ((clcode) >= CMD_ERRCL_L2ND && (clcode) <= CMD_ERRCL_DL2ND)
+
+#define CMD_ERRCL_ISMEM(clcode) \
+ ((clcode & (CMD_ERRCL_DAU | CMD_ERRCL_DBU | CMD_ERRCL_FBU)) != 0)
+
+#define CMD_ERRCL_ISDCDP(clcode) \
+ (clcode == CMD_ERRCL_DCDP)
+
+#define CMD_ERRCL_ISICDP(clcode) \
+ (clcode == CMD_ERRCL_ICDP)
+
+#define CMD_ERRCL_L2UE_WRITEBACK(clcode) \
+ ((clcode & (CMD_ERRCL_LDWU | CMD_ERRCL_WBUE)) != 0)
+
+#define CMD_ERRCL_REMOTEL2(clcode) \
+ ((clcode & (CMD_ERRCL_WBUE | CMD_ERRCL_CBCE)) != 0)
+
+#endif /* sun4u */
+
+#ifdef sun4v
+#define L2_ERR 1
+#define MISCREGS_ERR 2
+#define L2ND_ERR 3
+#define MEM_ERR 4
+#define DCDP_ERR 5
+#define ICDP_ERR 6
+#define REMOTE_L2ERR 7
+#define UNKNOWN_ERR 8
+#endif
+
+
#define CMD_ERRCL_MATCH(clcode, mask) \
(((clcode) & (mask)) != 0)
@@ -195,12 +247,14 @@ typedef struct cmd {
uint64_t cmd_thresh_abs_sysmem; /* Pg ret warning thresh (# of pages) */
uint64_t cmd_thresh_abs_badrw; /* Bad r/w retire thresh (# of pages) */
cmd_serd_t cmd_miscregs_serd; /* params for misregs serd */
- hrtime_t cmd_miscregs_trdelay; /* delay for redelivery misregs */
+ cmd_serd_t cmd_dcache_serd; /* params for dcache serd */
+ cmd_serd_t cmd_icache_serd; /* params for icache serd */
#ifdef sun4u
uint16_t cmd_dp_flag; /* datapath error in progress if set */
#endif
#ifdef sun4v
cmd_list_t cmd_branches; /* List of branches state structures */
+ uint64_t cmd_delta_ena; /* the sun4v train delta ena */
#endif
nvlist_t *cmd_auth; /* DE's fault authority value */
} cmd_t;
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c
index 51b5ee785f..964571de6b 100644
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c
@@ -494,6 +494,8 @@ cmd_xr_id2hdlr(fmd_hdl_t *hdl, uint_t id)
return (cmd_xxc_resolve);
case CMD_XR_HDLR_XXU:
return (cmd_xxu_resolve);
+ case CMD_XR_HDLR_NOP:
+ return (cmd_nop_resolve);
default:
fmd_hdl_abort(hdl, "cmd_xr_id2hdlr called with bad hdlrid %x\n",
id);
@@ -514,12 +516,7 @@ cmd_xr_create(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
err |= nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &xr->xr_ena);
- /*
- * Skip the cmd_xr_fill() for misc reg errors because
- * these data are not in the misc reg ereport
- */
- if (!CMD_ERRCL_ISMISCREGS(clcode))
- err |= cmd_xr_fill(hdl, nvl, xr, clcode);
+ err |= cmd_xr_fill(hdl, nvl, xr, clcode);
#ifdef sun4u
err |= cmd_xr_pn_cache_fill(hdl, nvl, xr, cpu, clcode);
#endif
@@ -550,20 +547,15 @@ cmd_xr_create(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
cmd_evdisp_t
cmd_xr_reschedule(fmd_hdl_t *hdl, cmd_xr_t *xr, uint_t hdlrid)
{
+
fmd_hdl_debug(hdl, "scheduling redelivery of %llx with xr %p\n",
xr->xr_clcode, xr);
xr->xr_hdlrid = hdlrid;
xr->xr_hdlr = cmd_xr_id2hdlr(hdl, hdlrid);
- if (CMD_ERRCL_ISMISCREGS(xr->xr_clcode))
- xr->xr_id = fmd_timer_install(hdl,
- (void *)CMD_TIMERTYPE_CPU_XR_WAITER, NULL,
- cmd.cmd_miscregs_trdelay);
- else
- xr->xr_id = fmd_timer_install(hdl,
- (void *)CMD_TIMERTYPE_CPU_XR_WAITER,
- NULL, cmd.cmd_xxcu_trdelay);
+ xr->xr_id = fmd_timer_install(hdl, (void *)CMD_TIMERTYPE_CPU_XR_WAITER,
+ NULL, cmd.cmd_xxcu_trdelay);
if (xr->xr_ref++ == 0)
cmd_list_append(&cmd.cmd_xxcu_redelivs, xr);
@@ -1268,15 +1260,49 @@ static const cmd_xxcu_train_t cmd_xxcu_trains[] = {
CMD_TRAIN(CMD_ERRCL_LDAC, CMD_ERRCL_LDWC),
CMD_TRAIN(CMD_ERRCL_LDRC, CMD_ERRCL_LDWC),
CMD_TRAIN(CMD_ERRCL_LDSC, CMD_ERRCL_LDWC),
+ CMD_TRAIN(CMD_ERRCL_CBCE, CMD_ERRCL_LDWC),
CMD_TRAIN(CMD_ERRCL_LDAU, CMD_ERRCL_LDWU),
+ CMD_TRAIN(CMD_ERRCL_LDAU, CMD_ERRCL_WBUE),
+ CMD_TRAIN(CMD_ERRCL_LDAU, CMD_ERRCL_DCDP),
CMD_TRAIN(CMD_ERRCL_LDRU, CMD_ERRCL_LDWU),
+ CMD_TRAIN(CMD_ERRCL_LDRU, CMD_ERRCL_WBUE),
+ CMD_TRAIN(CMD_ERRCL_LDRU, CMD_ERRCL_DCDP),
CMD_TRAIN(CMD_ERRCL_LDSU, CMD_ERRCL_LDWU),
- /* SBDLC: SBDPC */
+ CMD_TRAIN(CMD_ERRCL_LDSU, CMD_ERRCL_WBUE),
+ CMD_TRAIN(CMD_ERRCL_LDSU, CMD_ERRCL_DCDP),
CMD_TRAIN(CMD_ERRCL_SBDLC, CMD_ERRCL_SBDPC),
- /* TCCP: TCCD */
CMD_TRAIN(CMD_ERRCL_TCCP, CMD_ERRCL_TCCD),
- /* TCCD: TCCD */
CMD_TRAIN(CMD_ERRCL_TCCD, CMD_ERRCL_TCCD),
+ CMD_TRAIN(CMD_ERRCL_DBU, CMD_ERRCL_DCDP),
+ CMD_TRAIN(CMD_ERRCL_DBU, CMD_ERRCL_ICDP),
+ CMD_TRAIN(CMD_ERRCL_FBU, CMD_ERRCL_DCDP),
+ CMD_TRAIN(CMD_ERRCL_FBU, CMD_ERRCL_ICDP),
+ CMD_TRAIN(CMD_ERRCL_DAU, CMD_ERRCL_DCDP),
+ CMD_TRAIN(CMD_ERRCL_DAU, CMD_ERRCL_ICDP),
+ /*
+ * sun4v also has the following trains, but the train
+ * algorithm does an exhaustive search and compare
+ * all pairs in the train mask, so we don't need
+ * to define these trains
+ * dl2nd->ldwu (wbue), dcdp
+ * il2nd->ldwu (wbue), icdp
+ * dxl2u->ldwu (wbue), dcdp
+ * ixl2u->ldwu (wbue), icdp
+ */
+ CMD_TRAIN(CMD_ERRCL_DL2ND, CMD_ERRCL_DCDP),
+ CMD_TRAIN(CMD_ERRCL_DL2ND, CMD_ERRCL_LDWU),
+ CMD_TRAIN(CMD_ERRCL_DL2ND, CMD_ERRCL_WBUE),
+ CMD_TRAIN(CMD_ERRCL_IL2ND, CMD_ERRCL_ICDP),
+ CMD_TRAIN(CMD_ERRCL_IL2ND, CMD_ERRCL_LDWU),
+ CMD_TRAIN(CMD_ERRCL_IL2ND, CMD_ERRCL_WBUE),
+ CMD_TRAIN(CMD_ERRCL_L2ND, CMD_ERRCL_LDWU),
+ CMD_TRAIN(CMD_ERRCL_L2ND, CMD_ERRCL_WBUE),
+ CMD_TRAIN(CMD_ERRCL_DL2U, CMD_ERRCL_DCDP),
+ CMD_TRAIN(CMD_ERRCL_DL2U, CMD_ERRCL_LDWU),
+ CMD_TRAIN(CMD_ERRCL_DL2U, CMD_ERRCL_WBUE),
+ CMD_TRAIN(CMD_ERRCL_IL2U, CMD_ERRCL_ICDP),
+ CMD_TRAIN(CMD_ERRCL_IL2U, CMD_ERRCL_LDWU),
+ CMD_TRAIN(CMD_ERRCL_IL2U, CMD_ERRCL_WBUE),
#endif /* sun4u */
CMD_TRAIN(0, 0)
};
@@ -1293,29 +1319,6 @@ cmd_xxcu_train_match(cmd_errcl_t mask)
return (0);
}
-/*
- * Search for the entry that matches the ena and the AFAR
- * if we have a valid AFAR, otherwise just match the ENA
- */
-cmd_xxcu_trw_t *
-cmd_trw_lookup(uint64_t ena, uint8_t afar_status, uint64_t afar)
-{
- int i;
-
- if (afar_status == AFLT_STAT_VALID) {
- for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) {
- if (cmd.cmd_xxcu_trw[i].trw_ena == ena &&
- cmd.cmd_xxcu_trw[i].trw_afar == afar)
- return (&cmd.cmd_xxcu_trw[i]);
- }
- } else {
- for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) {
- if (cmd.cmd_xxcu_trw[i].trw_ena == ena)
- return (&cmd.cmd_xxcu_trw[i]);
- }
- }
- return (NULL);
-}
cmd_xxcu_trw_t *
cmd_trw_alloc(uint64_t ena, uint64_t afar)
@@ -1482,6 +1485,8 @@ cmd_cpu_create_faultlist(fmd_hdl_t *hdl, fmd_case_t *casep, cmd_cpu_t *cpu,
CMD_CPU_LEVEL_THREAD, cpu->cpu_type);
nvlist_free(asru);
}
+ if (!fmd_nvl_fmri_present(hdl, cpui->cpu_asru_nvl))
+ continue;
cpui->cpu_faulting = FMD_B_TRUE;
cpu_buf_write(hdl, cpui);
flt = cmd_nvl_create_fault(hdl, fltnm, cert,
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h
index 6e3b45baed..c2123d4e3f 100644
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h
@@ -250,6 +250,7 @@ typedef struct cmd_xr cmd_xr_t;
*/
#define CMD_XR_HDLR_XXC 1
#define CMD_XR_HDLR_XXU 2
+#define CMD_XR_HDLR_NOP 3
typedef void cmd_xr_hdlr_f(fmd_hdl_t *, cmd_xr_t *, fmd_event_t *);
@@ -299,6 +300,9 @@ extern void cmd_xr_write(fmd_hdl_t *, cmd_xr_t *);
extern void cmd_xxc_resolve(fmd_hdl_t *, cmd_xr_t *, fmd_event_t *);
extern void cmd_xxu_resolve(fmd_hdl_t *, cmd_xr_t *, fmd_event_t *);
+extern void cmd_nop_resolve(fmd_hdl_t *, cmd_xr_t *, fmd_event_t *);
+extern cmd_evdisp_t cmd_xxcu_initial(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
+ const char *, cmd_errcl_t, uint_t);
/*
* The master structure containing or referencing all of the state for a given
@@ -767,11 +771,18 @@ extern cpu_family_t cmd_cpu_check_support(void);
extern boolean_t cmd_cpu_ecache_support(void);
extern int cmd_xr_fill(fmd_hdl_t *, nvlist_t *, cmd_xr_t *, cmd_errcl_t);
+extern void cmd_fill_errdata(cmd_errcl_t, cmd_cpu_t *, cmd_case_t **,
+ const errdata_t **);
+extern cmd_xxcu_trw_t *cmd_trw_lookup(uint64_t, uint8_t, uint64_t);
+extern cmd_evdisp_t cmd_nop_train(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
+ const char *, cmd_errcl_t);
+extern cmd_errcl_t cmd_train_match(cmd_errcl_t, cmd_errcl_t);
+extern int cmd_afar_status_check(uint8_t, cmd_errcl_t);
#ifdef sun4u
-extern int cmd_cpu_synd_check(uint16_t);
+extern int cmd_cpu_synd_check(uint16_t, cmd_errcl_t clcode);
#else /* sun4u */
-extern int cmd_cpu_synd_check(uint32_t);
+extern int cmd_cpu_synd_check(uint32_t, cmd_errcl_t clcode);
#endif /* sun4u */
extern int cmd_afar_valid(fmd_hdl_t *hdl, nvlist_t *nvl, cmd_errcl_t,
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c
index 2efb18e9cc..2ac88e20e0 100644
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c
@@ -262,35 +262,36 @@ CMD_OPL_UEHANDLER(oplmtlb, opl_mtlb, CMD_PTR_CPU_MTLB, "core", 1)
CMD_OPL_UEHANDLER(opltlbp, opl_tlbp, CMD_PTR_CPU_TLBP, "core", 1)
#endif /* sun4u */
-static const errdata_t l3errdata =
- { &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_CPU_L3DATA };
-static const errdata_t l2errdata =
- { &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_CPU_L2DATA };
-static const errdata_t miscregsdata =
- { &cmd.cmd_miscregs_serd, "misc_reg", CMD_PTR_CPU_MISC_REGS };
-
+/*ARGSUSED*/
+static void
+cmd_nop_hdlr(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep)
+{
+ fmd_hdl_debug(hdl, "nop train resolved for clcode %llx\n",
+ xr->xr_clcode);
+}
/*ARGSUSED*/
static void
cmd_xxu_hdlr(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep)
{
- int isl3 = CMD_ERRCL_ISL3XXCU(xr->xr_clcode);
- const errdata_t *ed = isl3 ? &l3errdata : &l2errdata;
+ const errdata_t *ed;
cmd_cpu_t *cpu = xr->xr_cpu;
- cmd_case_t *cc = isl3 ? &cpu->cpu_l3data : &cpu->cpu_l2data;
+ cmd_case_t *cc;
const char *uuid;
nvlist_t *rsrc = NULL;
+ cmd_fill_errdata(xr->xr_clcode, cpu, &cc, &ed);
+
if (cpu->cpu_faulting) {
CMD_STAT_BUMP(xxu_retr_flt);
return;
}
- if (xr->xr_afar_status != AFLT_STAT_VALID) {
+ if (cmd_afar_status_check(xr->xr_afar_status, xr->xr_clcode) < 0) {
fmd_hdl_debug(hdl, "xxU dropped, afar not VALID\n");
return;
}
- if (cmd_cpu_synd_check(xr->xr_synd) < 0) {
+ if (cmd_cpu_synd_check(xr->xr_synd, xr->xr_clcode) < 0) {
fmd_hdl_debug(hdl, "xxU/LDxU dropped due to syndrome\n");
return;
}
@@ -353,16 +354,7 @@ cmd_xxc_hdlr(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep)
return;
}
#endif
- if (CMD_ERRCL_ISMISCREGS(xr->xr_clcode)) {
- ed = &miscregsdata;
- cc = &cpu->cpu_misc_regs;
- } else if (CMD_ERRCL_ISL2XXCU(xr->xr_clcode)) {
- ed = &l2errdata;
- cc = &cpu->cpu_l2data;
- } else {
- ed = &l3errdata;
- cc = &cpu->cpu_l3data;
- }
+ cmd_fill_errdata(xr->xr_clcode, cpu, &cc, &ed);
if (cpu->cpu_faulting || (cc->cc_cp != NULL &&
fmd_case_solved(hdl, cc->cc_cp)))
@@ -424,12 +416,19 @@ cmd_xxcu_resolve(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep,
trw->trw_flags |= CMD_TRW_F_DELETING;
+ /*
+ * In sun4v, the matching train rule is changed. It matches only
+ * a portion of the train mask, so can't discard the rest of
+ * the error in the train mask.
+ */
+#ifdef sun4u
if (trw->trw_flags & CMD_TRW_F_CAUSESEEN) {
fmd_hdl_debug(hdl, "cause already seen -- discarding\n");
goto done;
}
+#endif
- if ((cause = cmd_xxcu_train_match(trw->trw_mask)) == 0) {
+ if ((cause = cmd_train_match(trw->trw_mask, xr->xr_clcode)) == 0) {
/*
* We didn't match in a train, so we're going to process each
* event individually.
@@ -467,7 +466,13 @@ cmd_xxu_resolve(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep)
cmd_xxcu_resolve(hdl, xr, ep, cmd_xxu_hdlr);
}
-static cmd_evdisp_t
+void
+cmd_nop_resolve(fmd_hdl_t *hdl, cmd_xr_t *xr, fmd_event_t *ep)
+{
+ cmd_xxcu_resolve(hdl, xr, ep, cmd_nop_hdlr);
+}
+
+cmd_evdisp_t
cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
const char *class, cmd_errcl_t clcode, uint_t hdlrid)
{
@@ -479,6 +484,7 @@ cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
uint64_t afar;
uint8_t level = clcode & CMD_ERRCL_LEVEL_EXTRACT;
uint8_t afar_status;
+ const errdata_t *ed = NULL;
clcode &= CMD_ERRCL_LEVEL_MASK; /* keep level bits out of train masks */
@@ -486,12 +492,7 @@ cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
level)) == NULL || cpu->cpu_faulting)
return (CMD_EVD_UNUSED);
- if (CMD_ERRCL_ISMISCREGS(clcode))
- cc = &cpu->cpu_misc_regs;
- else if (CMD_ERRCL_ISL2XXCU(clcode))
- cc = &cpu->cpu_l2data;
- else
- cc = &cpu->cpu_l3data;
+ cmd_fill_errdata(clcode, cpu, &cc, &ed);
if (cc->cc_cp != NULL && fmd_case_solved(hdl, cc->cc_cp))
return (CMD_EVD_REDUND);
@@ -532,25 +533,19 @@ cmd_xxcu_initial(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
fmd_hdl_debug(hdl, "trw rescheduled for train delivery\n");
redeliver:
- if ((xr = cmd_xr_create(hdl, ep, nvl, cpu, clcode)) == NULL)
+ if ((xr = cmd_xr_create(hdl, ep, nvl, cpu, clcode)) == NULL) {
+ fmd_hdl_debug(hdl, "cmd_xr_create failed");
return (CMD_EVD_BAD);
+ }
return (cmd_xr_reschedule(hdl, xr, hdlrid));
}
-#ifdef sun4v
-#define CMD_NIAGARA_1_CLASS "ereport.cpu.ultraSPARC-T1."
-#endif /* sun4v */
cmd_evdisp_t
cmd_xxu(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
cmd_errcl_t clcode)
{
-#ifdef sun4v
- if (strncmp(class, CMD_NIAGARA_1_CLASS,
- sizeof (CMD_NIAGARA_1_CLASS)) != 0)
- return (cmd_l2u(hdl, ep, nvl, class, clcode));
-#endif /* sun4v */
return (cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_XXU));
}
@@ -558,15 +553,17 @@ cmd_evdisp_t
cmd_xxc(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
cmd_errcl_t clcode)
{
-#ifdef sun4v
- if (strncmp(class, CMD_NIAGARA_1_CLASS,
- sizeof (CMD_NIAGARA_1_CLASS)) != 0)
- return (cmd_l2c(hdl, ep, nvl, class, clcode));
-#endif /* sun4v */
return (cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_XXC));
}
cmd_evdisp_t
+cmd_nop_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
+ const char *class, cmd_errcl_t clcode)
+{
+ return (cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP));
+}
+
+cmd_evdisp_t
cmd_miscregs_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
const char *class, cmd_errcl_t clcode)
{
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c
index ad499ac275..db80ac0e86 100644
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c
@@ -255,11 +255,13 @@ static cmd_subscriber_t cmd_subscribers[] = {
{ "ereport.cpu.*.icvp", cmd_icache, CMD_CPU_LEVEL_CORE },
{ "ereport.cpu.*.ictp", cmd_icache, CMD_CPU_LEVEL_CORE },
{ "ereport.cpu.*.ictm", cmd_icache, CMD_CPU_LEVEL_CORE },
- { "ereport.cpu.*.icdp", cmd_icache, CMD_CPU_LEVEL_CORE },
+ { "ereport.cpu.*.icdp", cmd_xxc,
+ CMD_ERRCL_ICDP | CMD_CPU_LEVEL_CORE },
{ "ereport.cpu.*.dcvp", cmd_dcache, CMD_CPU_LEVEL_CORE },
{ "ereport.cpu.*.dctp", cmd_dcache, CMD_CPU_LEVEL_CORE },
{ "ereport.cpu.*.dctm", cmd_dcache, CMD_CPU_LEVEL_CORE },
- { "ereport.cpu.*.dcdp", cmd_dcache, CMD_CPU_LEVEL_CORE },
+ { "ereport.cpu.*.dcdp", cmd_xxc,
+ CMD_ERRCL_DCDP | CMD_CPU_LEVEL_CORE },
{ "ereport.cpu.*.itl2c", cmd_xxc, CMD_ERRCL_LDAC |
CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.dtl2c", cmd_xxc, CMD_ERRCL_LDAC |
@@ -273,13 +275,13 @@ static cmd_subscriber_t cmd_subscribers[] = {
{ "ereport.cpu.*.cwql2c", cmd_xxc, CMD_ERRCL_LDAC |
CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.lvc", cmd_txce, CMD_CPU_LEVEL_CHIP },
- { "ereport.cpu.*.itl2u", cmd_xxu, CMD_ERRCL_LDAU |
+ { "ereport.cpu.*.itl2u", cmd_xxu, CMD_ERRCL_IL2U |
CMD_CPU_LEVEL_CHIP },
- { "ereport.cpu.*.dtl2u", cmd_xxu, CMD_ERRCL_LDAU |
+ { "ereport.cpu.*.dtl2u", cmd_xxu, CMD_ERRCL_DL2U |
CMD_CPU_LEVEL_CHIP },
- { "ereport.cpu.*.icl2u", cmd_xxu, CMD_ERRCL_LDAU |
+ { "ereport.cpu.*.icl2u", cmd_xxu, CMD_ERRCL_IL2U |
CMD_CPU_LEVEL_CHIP },
- { "ereport.cpu.*.dcl2u", cmd_xxu, CMD_ERRCL_LDAU |
+ { "ereport.cpu.*.dcl2u", cmd_xxu, CMD_ERRCL_DL2U |
CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.mal2u", cmd_xxu, CMD_ERRCL_LDAU |
CMD_CPU_LEVEL_CHIP },
@@ -288,15 +290,17 @@ static cmd_subscriber_t cmd_subscribers[] = {
{ "ereport.cpu.*.lvf", cmd_l2ctl, CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.lrf", cmd_l2ctl, CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.ltu", cmd_l2ctl, CMD_CPU_LEVEL_CHIP },
- { "ereport.cpu.*.itl2nd", cmd_nop },
- { "ereport.cpu.*.dtl2nd", cmd_nop },
- { "ereport.cpu.*.icl2nd", cmd_nop },
- { "ereport.cpu.*.l2nd", cmd_nop },
- { "ereport.cpu.*.mal2nd", cmd_nop },
- { "ereport.cpu.*.cwql2nd", cmd_nop },
+ { "ereport.cpu.*.itl2nd", cmd_nop_train, CMD_ERRCL_IL2ND },
+ { "ereport.cpu.*.dtl2nd", cmd_nop_train, CMD_ERRCL_DL2ND },
+ { "ereport.cpu.*.icl2nd", cmd_nop_train, CMD_ERRCL_IL2ND },
+ { "ereport.cpu.*.dcl2nd", cmd_nop_train, CMD_ERRCL_DL2ND },
+ { "ereport.cpu.*.l2nd", cmd_nop_train, CMD_ERRCL_L2ND },
+ { "ereport.cpu.*.mal2nd", cmd_nop_train, CMD_ERRCL_L2ND },
+ { "ereport.cpu.*.cwql2nd", cmd_nop_train, CMD_ERRCL_L2ND },
{ "ereport.cpu.*.ldac", cmd_xxc, CMD_ERRCL_LDAC |
CMD_CPU_LEVEL_CHIP },
- { "ereport.cpu.*.ldwc", cmd_nop },
+ { "ereport.cpu.*.ldwc", cmd_xxc, CMD_ERRCL_LDWC |
+ CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.ldrc", cmd_xxc, CMD_ERRCL_LDRC |
CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.ldsc", cmd_xxc, CMD_ERRCL_LDSC |
@@ -304,7 +308,8 @@ static cmd_subscriber_t cmd_subscribers[] = {
{ "ereport.cpu.*.ltc", cmd_txce, CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.ldau", cmd_xxu, CMD_ERRCL_LDAU |
CMD_CPU_LEVEL_CHIP },
- { "ereport.cpu.*.ldwu", cmd_nop },
+ { "ereport.cpu.*.ldwu", cmd_xxu, CMD_ERRCL_LDWU |
+ CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.ldru", cmd_xxu, CMD_ERRCL_LDRU |
CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.ldsu", cmd_xxu, CMD_ERRCL_LDSU |
@@ -312,10 +317,11 @@ static cmd_subscriber_t cmd_subscribers[] = {
{ "ereport.cpu.*.lvu", cmd_l2ctl, CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.lru", cmd_l2ctl, CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.fbr", cmd_fb },
- { "ereport.cpu.*.fbu", cmd_fb },
+ { "ereport.cpu.*.fbu", cmd_fb_train, CMD_ERRCL_FBU },
{ "ereport.cpu.*.dac", cmd_ce, CMD_ERRCL_DAC },
{ "ereport.cpu.*.dsc", cmd_ce, CMD_ERRCL_DSC },
- { "ereport.cpu.*.dau", cmd_ue, CMD_ERRCL_DAU },
+ { "ereport.cpu.*.dau", cmd_ue_train, CMD_ERRCL_DAU },
+ { "ereport.cpu.*.dbu", cmd_nop_train, CMD_ERRCL_DBU },
{ "ereport.cpu.*.dsu", cmd_ue, CMD_ERRCL_DSU },
{ "ereport.cpu.*.sbdpc", cmd_miscregs_train,
CMD_ERRCL_SBDPC | CMD_CPU_LEVEL_THREAD },
@@ -347,9 +353,11 @@ static cmd_subscriber_t cmd_subscribers[] = {
CMD_ERRCL_SBDPC | CMD_CPU_LEVEL_THREAD },
{ "ereport.cpu.*.tsau", cmd_miscregs_ue,
CMD_CPU_LEVEL_THREAD },
- { "ereport.cpu.*.cbce", cmd_xxc, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.cbce", cmd_xxc, CMD_ERRCL_CBCE |
+ CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.dce", cmd_nop },
- { "ereport.cpu.*.wbue", cmd_nop },
+ { "ereport.cpu.*.wbue", cmd_xxu, CMD_ERRCL_WBUE |
+ CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.lfu-slf", cmd_lfu_ce, CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.lfu-rtf", cmd_lfu_ue, CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.lfu-tto", cmd_lfu_ue, CMD_CPU_LEVEL_CHIP },
@@ -523,17 +531,22 @@ static const fmd_prop_t fmd_props[] = {
{ "misc_regs_n", FMD_TYPE_UINT32, "8"},
{ "misc_regs_t", FMD_TYPE_TIME, "168h" },
{ "iorxefrx_window", FMD_TYPE_TIME, "3s" },
+#ifdef sun4u
{ "xxcu_trdelay", FMD_TYPE_TIME, "200ms" },
+#else
+ { "xxcu_trdelay", FMD_TYPE_TIME, "15s"},
+#endif /* sun4u */
{ "xxcu_restart_delay", FMD_TYPE_TIME, "1s" },
{ "num_xxcu_waiters", FMD_TYPE_UINT32, "128" },
{ "thresh_tpct_sysmem", FMD_TYPE_UINT64, "100" },
{ "thresh_abs_sysmem", FMD_TYPE_UINT64, "0" },
{ "thresh_abs_badrw", FMD_TYPE_UINT64, "128" },
{ "max_perm_ce_dimm", FMD_TYPE_UINT32, "128" },
- { "miscregs_trdelay", FMD_TYPE_TIME, "45s"},
#ifdef sun4v
{ "fbr_n", FMD_TYPE_UINT32, "14" },
{ "fbr_t", FMD_TYPE_TIME, "30min"},
+ /* delta_ena value = 0x500000000nsec ~= 22sec */
+ { "delta_ena", FMD_TYPE_UINT64, "0x50000000000000"},
#endif
{ NULL, 0, NULL }
};
@@ -795,6 +808,9 @@ _fmd_init(fmd_hdl_t *hdl)
cmd.cmd_xxcu_ntrw = fmd_prop_get_int32(hdl, "num_xxcu_waiters");
cmd.cmd_xxcu_trw = fmd_hdl_zalloc(hdl, sizeof (cmd_xxcu_trw_t) *
cmd.cmd_xxcu_ntrw, FMD_SLEEP);
+#ifdef sun4v
+ cmd.cmd_delta_ena = fmd_prop_get_int64(hdl, "delta_ena");
+#endif
cmd.cmd_l2data_serd.cs_name = "l2data";
cmd.cmd_l2data_serd.cs_n = fmd_prop_get_int32(hdl, "l2data_n");
@@ -804,11 +820,18 @@ _fmd_init(fmd_hdl_t *hdl)
cmd.cmd_l3data_serd.cs_n = fmd_prop_get_int32(hdl, "l3data_n");
cmd.cmd_l3data_serd.cs_t = fmd_prop_get_int64(hdl, "l3data_t");
- cmd.cmd_miscregs_trdelay = fmd_prop_get_int64(hdl, "miscregs_trdelay");
cmd.cmd_miscregs_serd.cs_name = "misc_regs";
cmd.cmd_miscregs_serd.cs_n = fmd_prop_get_int32(hdl, "misc_regs_n");
cmd.cmd_miscregs_serd.cs_t = fmd_prop_get_int64(hdl, "misc_regs_t");
+ cmd.cmd_dcache_serd.cs_name = "dcache";
+ cmd.cmd_dcache_serd.cs_n = fmd_prop_get_int32(hdl, "dcache_n");
+ cmd.cmd_dcache_serd.cs_t = fmd_prop_get_int64(hdl, "dcache_t");
+
+ cmd.cmd_icache_serd.cs_name = "icache";
+ cmd.cmd_icache_serd.cs_n = fmd_prop_get_int32(hdl, "icache_n");
+ cmd.cmd_icache_serd.cs_t = fmd_prop_get_int64(hdl, "icache_t");
+
if (cmd_state_restore(hdl) < 0) {
_fmd_fini(hdl);
fmd_hdl_abort(hdl, "failed to restore saved state\n");
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h
index b3ba5e50d3..e4bd9347ef 100644
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -136,6 +136,10 @@ extern void cmd_bank_close(fmd_hdl_t *, void *);
extern void cmd_branch_close(fmd_hdl_t *, void *);
extern cmd_evdisp_t cmd_fb(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
const char *, cmd_errcl_t);
+extern cmd_evdisp_t cmd_fb_train(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
+ const char *, cmd_errcl_t);
+extern cmd_evdisp_t cmd_ue_train(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
+ const char *, cmd_errcl_t);
#endif
/*
diff --git a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c
index 1b59083a26..e1ad243d9f 100644
--- a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c
+++ b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c
@@ -68,9 +68,9 @@
#define PN_ECSTATE_NA 5
-static const errdata_t l3errdata =
+static const errdata_t clr_l3errdata =
{ &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_LxCACHE_CASE };
-static const errdata_t l2errdata =
+static const errdata_t clr_l2errdata =
{ &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_LxCACHE_CASE };
@@ -1060,11 +1060,11 @@ cmd_cache_ce_panther(fmd_hdl_t *hdl, fmd_event_t *ep, cmd_xr_t *xr)
if (CMD_ERRCL_ISL2XXCU(xr->xr_clcode)) {
type = CMD_PTR_CPU_L2DATA;
cpu_cc = &cpu->cpu_l2data;
- cache_ed = &l2errdata;
+ cache_ed = &clr_l2errdata;
} else {
type = CMD_PTR_CPU_L3DATA;
cpu_cc = &cpu->cpu_l3data;
- cache_ed = &l3errdata;
+ cache_ed = &clr_l3errdata;
}
/* Ensure that our case is not solved */
@@ -1084,7 +1084,7 @@ cmd_cache_ce_panther(fmd_hdl_t *hdl, fmd_event_t *ep, cmd_xr_t *xr)
}
/* Check for valid syndrome */
- if (cmd_cpu_synd_check(xr->xr_synd) < 0) {
+ if (cmd_cpu_synd_check(xr->xr_synd, xr->xr_clcode) < 0) {
fmd_hdl_debug(hdl,
"xxC/LDxC dropped due to syndrome\n");
return (0);
diff --git a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c
index 1633078af1..1aa8397a5a 100644
--- a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c
+++ b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_cpu_arch.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -68,8 +68,67 @@ cmd_xr_fill(fmd_hdl_t *hdl, nvlist_t *nvl, cmd_xr_t *xr, cmd_errcl_t clcode)
return (0);
}
+/*
+ * Search for the entry that matches the ena and the AFAR
+ * if we have a valid AFAR, otherwise just match the ENA
+ */
+cmd_xxcu_trw_t *
+cmd_trw_lookup(uint64_t ena, uint8_t afar_status, uint64_t afar)
+{
+ int i;
+
+ if (afar_status == AFLT_STAT_VALID) {
+ for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) {
+ if (cmd.cmd_xxcu_trw[i].trw_ena == ena &&
+ cmd.cmd_xxcu_trw[i].trw_afar == afar)
+ return (&cmd.cmd_xxcu_trw[i]);
+ }
+ } else {
+ for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) {
+ if (cmd.cmd_xxcu_trw[i].trw_ena == ena)
+ return (&cmd.cmd_xxcu_trw[i]);
+ }
+ }
+ return (NULL);
+}
+
+/*ARGSUSED*/
+cmd_errcl_t
+cmd_train_match(cmd_errcl_t trw_mask, cmd_errcl_t resolved_err)
+{
+ return (cmd_xxcu_train_match(trw_mask));
+}
+
+/*ARGSUSED*/
+int
+cmd_afar_status_check(uint8_t afar_status, cmd_errcl_t clcode)
+{
+ if (afar_status == AFLT_STAT_VALID)
+ return (0);
+ return (-1);
+}
+
+const errdata_t l3errdata =
+ { &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_CPU_L3DATA };
+const errdata_t l2errdata =
+ { &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_CPU_L2DATA };
+
+void
+cmd_fill_errdata(cmd_errcl_t clcode, cmd_cpu_t *cpu, cmd_case_t **cc,
+ const errdata_t **ed)
+{
+ if (CMD_ERRCL_ISL2XXCU(clcode)) {
+ *ed = &l2errdata;
+ *cc = &cpu->cpu_l2data;
+ } else {
+ *ed = &l3errdata;
+ *cc = &cpu->cpu_l3data;
+ }
+}
+
+/*ARGSUSED*/
int
-cmd_cpu_synd_check(uint16_t synd)
+cmd_cpu_synd_check(uint16_t synd, cmd_errcl_t clcode)
{
if (synd == CH_POISON_SYND_FROM_XXU_WRITE ||
synd == CH_POISON_SYND_FROM_XXU_WRMERGE ||
diff --git a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c
index 3e3e29886c..e6e3bad01f 100644
--- a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_cpu_arch.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -47,21 +47,221 @@
int cmd_afsr_check(fmd_hdl_t *, uint64_t, cmd_errcl_t, uint8_t *);
+const errdata_t l3errdata =
+ { &cmd.cmd_l3data_serd, "l3cachedata", CMD_PTR_CPU_L3DATA };
+const errdata_t n1l2errdata =
+ { &cmd.cmd_l2data_serd, "l2cachedata", CMD_PTR_CPU_L2DATA };
+const errdata_t n2ce_l2errdata =
+ { &cmd.cmd_l2data_serd, "l2data-c", CMD_PTR_CPU_L2DATA };
+const errdata_t n2ue_l2errdata =
+ { &cmd.cmd_l2data_serd, "l2data-u", CMD_PTR_CPU_L2DATA };
+const errdata_t miscregsdata =
+ { &cmd.cmd_miscregs_serd, "misc_reg", CMD_PTR_CPU_MISC_REGS };
+const errdata_t dcachedata =
+ { &cmd.cmd_dcache_serd, "dcache", CMD_PTR_CPU_DCACHE };
+const errdata_t icachedata =
+ { &cmd.cmd_icache_serd, "icache", CMD_PTR_CPU_ICACHE };
+
+static int
+cmd_xr_error_type(cmd_errcl_t clcode)
+{
+ if (CMD_ERRCL_ISMISCREGS(clcode))
+ return (MISCREGS_ERR);
+ else if (CMD_ERRCL_ISL2XXCU(clcode))
+ return (L2_ERR);
+ else if (CMD_ERRCL_ISL2ND(clcode))
+ return (L2ND_ERR);
+ else if (CMD_ERRCL_ISMEM(clcode))
+ return (MEM_ERR);
+ else if (CMD_ERRCL_ISDCDP(clcode))
+ return (DCDP_ERR);
+ else if (CMD_ERRCL_ISICDP(clcode))
+ return (ICDP_ERR);
+ else if (CMD_ERRCL_REMOTEL2(clcode))
+ return (REMOTE_L2ERR);
+ else
+ return (UNKNOWN_ERR);
+}
+
+void
+cmd_fill_errdata(cmd_errcl_t clcode, cmd_cpu_t *cpu, cmd_case_t **cc,
+ const errdata_t **ed)
+{
+ int err_type;
+
+ err_type = cmd_xr_error_type(clcode);
+ switch (err_type) {
+ case MISCREGS_ERR:
+ *ed = &miscregsdata;
+ *cc = &cpu->cpu_misc_regs;
+ break;
+ case L2_ERR:
+ case REMOTE_L2ERR:
+ if (cpu->cpu_type == CPU_ULTRASPARC_T1) {
+ *ed = &n1l2errdata;
+ *cc = &cpu->cpu_l2data;
+ } else {
+ if (CMD_ERRCL_ISL2CE(clcode)) {
+ *ed = &n2ce_l2errdata;
+ *cc = &cpu->cpu_l2data;
+ } else {
+ *ed = &n2ue_l2errdata;
+ *cc = &cpu->cpu_l2data;
+ }
+ }
+ break;
+ case DCDP_ERR:
+ *ed = &dcachedata;
+ *cc = &cpu->cpu_dcache;
+ break;
+ case ICDP_ERR:
+ *ed = &icachedata;
+ *cc = &cpu->cpu_icache;
+ break;
+ /*
+ * When an error goes through the train, it requires
+ * to have cmd_case_t & errdata_t structures even it is not
+ * diagnosed when the error is resolved. Sun4v does
+ * does not have a L3 error, but the L3 cpu case was defined,
+ * so its data structures are used for the default cases.
+ */
+ default:
+ *ed = &l3errdata;
+ *cc = &cpu->cpu_l3data;
+ break;
+ }
+}
+
+int
+cmd_afar_status_check(uint8_t afar_status, cmd_errcl_t clcode)
+{
+
+ /*
+ * There is no L2 data for a remote write back
+ * cache error in the ereport, so skip the status check
+ */
+ if (clcode == CMD_ERRCL_WBUE)
+ return (0);
+
+ if (afar_status == AFLT_STAT_VALID)
+ return (0);
+ return (-1);
+}
+
+/*
+ * Search for the entry that matches the ena and the AFAR
+ * if we have a valid AFAR, otherwise search for the entry
+ * that its's ena is < delta ENA.
+ */
+/*ARGSUSED*/
+cmd_xxcu_trw_t *
+cmd_trw_lookup(uint64_t ena, uint8_t afar_status, uint64_t afar)
+{
+ int i;
+
+ if (afar_status == AFLT_STAT_VALID) {
+ for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) {
+ if (cmd.cmd_xxcu_trw[i].trw_ena != 0) {
+ if ((llabs(ena - cmd.cmd_xxcu_trw[i].trw_ena) <
+ cmd.cmd_delta_ena) &&
+ (cmd.cmd_xxcu_trw[i].trw_afar == afar))
+ return (&cmd.cmd_xxcu_trw[i]);
+ }
+ }
+ }
+
+ for (i = 0; i < cmd.cmd_xxcu_ntrw; i++) {
+ if (cmd.cmd_xxcu_trw[i].trw_ena != 0) {
+ if (llabs(ena - cmd.cmd_xxcu_trw[i].trw_ena)
+ < cmd.cmd_delta_ena)
+ return (&cmd.cmd_xxcu_trw[i]);
+ }
+ }
+
+ return (NULL);
+}
+
+cmd_errcl_t
+cmd_get_nextbit(cmd_errcl_t trw_mask)
+{
+ cmd_errcl_t tmp_mask = 0;
+ cmd_errcl_t tmp;
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ tmp = (0x0000000000000001ULL << i);
+ if (tmp & trw_mask) {
+ tmp_mask = tmp;
+ break;
+ }
+ }
+ return (tmp_mask);
+}
+
+/*
+ * For a resolved error, its error code will be paired with
+ * each error code in the train mask and compared against the
+ * pre-defined trains in the cmd_cpu.c to determine if the error
+ * is in the train.
+ */
+cmd_errcl_t
+cmd_combine_two_train(cmd_errcl_t trw_mask, cmd_errcl_t resolved_err)
+{
+ cmd_errcl_t tmp_mask = 0;
+ cmd_errcl_t train_mask = 0;
+ cmd_errcl_t cause = 0;
+ cmd_errcl_t error_mask = trw_mask ^ resolved_err;
+
+ while (error_mask) {
+ tmp_mask = cmd_get_nextbit(error_mask);
+ if (tmp_mask == 0)
+ break;
+ train_mask = tmp_mask | resolved_err;
+ cause = cmd_xxcu_train_match(train_mask);
+ if (cause) {
+ return (cause);
+ }
+ error_mask = error_mask ^ tmp_mask;
+ }
+ return (0);
+}
+
+cmd_errcl_t
+cmd_train_match(cmd_errcl_t trw_mask, cmd_errcl_t resolved_err)
+{
+ return (cmd_combine_two_train(trw_mask, resolved_err));
+}
+
int
cmd_xr_fill(fmd_hdl_t *hdl, nvlist_t *nvl, cmd_xr_t *xr, cmd_errcl_t clcode)
{
uint64_t niagara_l2_afsr = 0;
+ int errtype;
+
+ errtype = cmd_xr_error_type(clcode);
+ /*
+ * skip the fill data for the errors which is not L2 errors.
+ */
+ if (errtype != L2_ERR) {
+ fmd_hdl_debug(hdl, "Skip fill L2 data for errtype %d\n",
+ errtype);
+ return (0);
+ }
if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_AFSR,
&niagara_l2_afsr) != 0 &&
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_ESR,
- &niagara_l2_afsr) != 0)
+ &niagara_l2_afsr) != 0) {
+ fmd_hdl_debug(hdl, "No L2 AFSR data");
return (-1);
+ }
if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_AFAR,
&xr->xr_afar) != 0 &&
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_EAR,
- &xr->xr_afar) != 0)
+ &xr->xr_afar) != 0) {
+ fmd_hdl_debug(hdl, "No L2 AFAR data");
return (-1);
+ }
if (nvlist_lookup_uint32(nvl, FM_EREPORT_PAYLOAD_NAME_L2_SYND,
&xr->xr_synd) != 0) {
/* Niagara-2 doesn't provide separate (redundant) l2-synd */
@@ -69,15 +269,17 @@ cmd_xr_fill(fmd_hdl_t *hdl, nvlist_t *nvl, cmd_xr_t *xr, cmd_errcl_t clcode)
}
if (cmd_afsr_check(hdl, niagara_l2_afsr, clcode,
- &xr->xr_synd_status) != 0)
+ &xr->xr_synd_status) != 0) {
+ fmd_hdl_debug(hdl, "Invalid L2 syndrome");
return (-1);
+ }
xr->xr_afar_status = xr->xr_synd_status;
return (0);
}
int
-cmd_cpu_synd_check(uint32_t synd)
+cmd_cpu_synd_check(uint32_t synd, cmd_errcl_t clcode)
{
int i;
@@ -89,6 +291,13 @@ cmd_cpu_synd_check(uint32_t synd)
* 0 is an invalid syndrome because it denotes no error, but
* is associated with an ereport -- meaning there WAS an error.
*/
+ /*
+ * HW does not store the syndrome value for write-back cache
+ * error, so skip the synd check for L2 write-back error
+ */
+ if (CMD_ERRCL_L2UE_WRITEBACK(clcode))
+ return (0);
+
if (synd == 0)
return (-1);
@@ -113,6 +322,8 @@ cmd_afsr_check(fmd_hdl_t *hdl, uint64_t afsr,
switch (clcode) {
case CMD_ERRCL_LDAU:
case CMD_ERRCL_LDSU:
+ case CMD_ERRCL_DL2U:
+ case CMD_ERRCL_IL2U:
*stat_val =
((afsr & NI_L2AFSR_P02) == 0) ?
AFLT_STAT_VALID: AFLT_STAT_INVALID;
diff --git a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c
index 047e29310d..46e350ec27 100644
--- a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -298,6 +298,30 @@ cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
/*ARGSUSED*/
cmd_evdisp_t
+cmd_ue_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
+ cmd_errcl_t clcode)
+{
+ cmd_evdisp_t rc, rc1;
+
+ /*
+ * The DAU is cause of the DAU->DCDP/ICDP train:
+ * - process the cause of the event.
+ * - register the error to the nop event train, so the effected errors
+ * (DCDP/ICDP) will be dropped.
+ */
+ rc = xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common);
+
+ rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP);
+ if (rc1 != 0)
+ fmd_hdl_debug(hdl,
+ "Fail to add error (%llx) to the train, rc = %d",
+ clcode, rc1);
+
+ return (rc);
+}
+
+/*ARGSUSED*/
+cmd_evdisp_t
cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
cmd_errcl_t clcode)
{
@@ -409,6 +433,30 @@ cmd_fb(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
return (CMD_EVD_OK);
}
+/*ARGSUSED*/
+cmd_evdisp_t
+cmd_fb_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
+ cmd_errcl_t clcode)
+{
+ cmd_evdisp_t rc, rc1;
+
+ /*
+ * The FBU is cause of the FBU->DCDP/ICDP train:
+ * - process the cause of the event.
+ * - register the error to the nop event train, so the effected errors
+ * (DCDP/ICDP) will be dropped.
+ */
+ rc = cmd_fb(hdl, ep, nvl, class, clcode);
+
+ rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP);
+ if (rc1 != 0)
+ fmd_hdl_debug(hdl,
+ "Fail to add error (%llx) to the train, rc = %d",
+ clcode, rc1);
+
+ return (rc);
+}
+
void
cmd_branch_close(fmd_hdl_t *hdl, void *arg)
{