summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorjc25722 <none@none>2008-08-01 13:23:00 -0700
committerjc25722 <none@none>2008-08-01 13:23:00 -0700
commitd69c2551e89e9440043ac6ff5739b58746286f33 (patch)
tree79a0744bcc6a06c5d97ef5b8239938532da2f501 /usr/src
parent7c5714f667c21540147234b280036c21ff93bc69 (diff)
downloadillumos-joyent-d69c2551e89e9440043ac6ff5739b58746286f33.tar.gz
6722145 Ultrasparc IV+:system panics due to send mondo timeout during tests of cacheline fault simulation.
Diffstat (limited to 'usr/src')
-rwxr-xr-xusr/src/cmd/fm/modules/common/cpumem-retire/cma_cache.c5
-rw-r--r--usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c14
-rw-r--r--usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_Lxcache.h10
-rw-r--r--usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c290
-rw-r--r--usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c94
5 files changed, 327 insertions, 86 deletions
diff --git a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_cache.c b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_cache.c
index f1fe84053d..c40c8a7b47 100755
--- a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_cache.c
+++ b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_cache.c
@@ -46,6 +46,7 @@ cma_cache_way_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru,
int ret, fd;
fmd_hdl_debug(hdl, "cpu cache *line* fault processing\n");
+ fmd_hdl_debug(hdl, "asru %lx\n", asru);
/*
* This added expansion is needed to cover the situation where a
@@ -111,9 +112,9 @@ cma_cache_way_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru,
if (repair) {
fmd_hdl_debug(hdl,
- "cpu %d: UnRetiring index 0x%06x, way 0x%02x\n bit 0x%04x"
+ "cpu %d: UnRetire for index 0x%06x, way 0x%02x\n bit 0x%04x"
" type 0x%02x", cpuid, index, way, bit, type);
- ret = ioctl(fd, MEM_CACHE_UNRETIRE, &cache_info);
+ return (CMA_RA_SUCCESS);
} else {
fmd_hdl_debug(hdl,
"cpu %d: Retiring index 0x%06x, way 0x%02x\n bit 0x%04x"
diff --git a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
index de8ebad2a7..c110c7dd97 100644
--- a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
+++ b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
@@ -296,6 +296,8 @@ cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, boolean_t repair)
uint_t nvc = 0;
uint_t keepopen;
int err = 0;
+ nvlist_t *asru;
+ uint32_t index;
err |= nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid);
err |= nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
@@ -309,7 +311,6 @@ cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, boolean_t repair)
while (nvc-- != 0 && (repair || !fmd_case_uuclosed(hdl, uuid))) {
nvlist_t *nvl = *nva++;
const cma_subscriber_t *subr;
- nvlist_t *asru;
if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
continue;
@@ -327,9 +328,14 @@ cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, boolean_t repair)
keepopen--;
}
}
-
- if (!keepopen && !repair)
- fmd_case_uuclose(hdl, uuid);
+ /*
+ * Do not close the case if we are handling cache faults.
+ */
+ if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_CACHE_INDEX, &index) != 0) {
+ if (!keepopen && !repair) {
+ fmd_case_uuclose(hdl, uuid);
+ }
+ }
}
static void
diff --git a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_Lxcache.h b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_Lxcache.h
index 42c8fed3c1..73aece64f1 100644
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_Lxcache.h
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_Lxcache.h
@@ -48,6 +48,10 @@ extern "C" {
#define CMD_ANON_WAY -1
#define MAX_WAYS 8
#define CMD_LxCACHE_F_FAULTING 1
+#define CMD_LxCACHE_F_RETIRED 0x2
+#define CMD_LxCACHE_F_UNRETIRED 0x4
+#define CMD_LxCACHE_F_RERETIRED 0x8
+
#define LxCACHE_MKVERSION(version) ((version) << 4 | 1)
#define CMD_LxCACHE_VERSION_1 LxCACHE_MKVERSION(1) /* 17 */
@@ -55,6 +59,7 @@ extern "C" {
#define CMD_LxCACHE_VERSIONED(Lxcache) ((Lxcache)->Lxcache_version & 1)
+#define MAX_FMRI_LEN 128
typedef struct cmd_Lxcache_pers {
cmd_header_t Lxcachep_header; /* Nodetype must be CMD_NT_LxCACHE */
/*
@@ -63,6 +68,7 @@ typedef struct cmd_Lxcache_pers {
char Lxcachep_cpu_hdr_bufname[CMD_BUFNMLEN];
uint_t Lxcachep_version;
cmd_fmri_t Lxcachep_asru; /* ASRU for this LxCACHE */
+ char Lxcachep_retired_fmri[MAX_FMRI_LEN];
cmd_ptrsubtype_t Lxcachep_type; /* L2 or L3 */
uint32_t Lxcachep_index; /* cache index Lxcache represents */
uint32_t Lxcachep_way; /* cache way this Lxcache represents */
@@ -93,6 +99,7 @@ typedef struct cmd_Lxcache {
#define Lxcache_index Lxcache_pers.Lxcachep_index
#define Lxcache_way Lxcache_pers.Lxcachep_way
#define Lxcache_bit Lxcache_pers.Lxcachep_bit
+#define Lxcache_retired_fmri Lxcache_pers.Lxcachep_retired_fmri
#define Lxcache_reason Lxcache_pers.Lxreason
#define Lxcache_list Lxcache_header.hdr_list
@@ -143,6 +150,9 @@ extern void cmd_fault_the_cpu(fmd_hdl_t *, cmd_cpu_t *, cmd_ptrsubtype_t,
const char *);
extern uint32_t cmd_Lx_index_count_type1_ways(cmd_cpu_t *);
extern uint32_t cmd_Lx_index_count_type2_ways(cmd_cpu_t *);
+extern char *cmd_type_to_str(cmd_ptrsubtype_t);
+extern boolean_t cmd_Lxcache_unretire(fmd_hdl_t *, cmd_cpu_t *,
+ cmd_Lxcache_t *, const char *);
extern int test_mode;
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c
index 5e9937345a..79b7d70864 100644
--- a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c
+++ b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c
@@ -42,6 +42,8 @@
#include <sys/fm/protocol.h>
#include <sys/cheetahregs.h>
#include <sys/mem_cache.h>
+#include <fmd_adm.h>
+
#define PN_ECSTATE_NA 5
/*
@@ -59,6 +61,27 @@ Lxcache_write(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
sizeof (cmd_Lxcache_pers_t));
}
+char *
+cmd_type_to_str(cmd_ptrsubtype_t pstype)
+{
+ switch (pstype) {
+ case CMD_PTR_CPU_L2DATA:
+ return ("l2data");
+ break;
+ case CMD_PTR_CPU_L3DATA:
+ return ("l3data");
+ break;
+ case CMD_PTR_CPU_L2TAG:
+ return ("l2tag");
+ break;
+ case CMD_PTR_CPU_L3TAG:
+ return ("l3tag");
+ break;
+ default:
+ return ("unknown");
+ break;
+ }
+}
void
cmd_Lxcache_free(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache,
int destroy)
@@ -209,7 +232,60 @@ cmd_Lxcache_lookup(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype, uint32_t index,
return (Lxcache_lookup_by_type_index_way_bit(cpu, pstype, index, way,
bit));
}
+ssize_t
+cmd_fmri_nvl2str(fmd_hdl_t *hdl, nvlist_t *nvl, char *buf, size_t buflen)
+{
+ uint8_t type;
+ uint32_t cpuid, index, way;
+ char *serstr = NULL;
+ char missing_list[128];
+
+ missing_list[0] = 0;
+ if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid) != 0)
+ (void) strcat(missing_list, FM_FMRI_CPU_ID);
+ if (nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID, &serstr) != 0)
+ (void) strcat(missing_list, FM_FMRI_CPU_SERIAL_ID);
+ if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_INDEX, &index) != 0)
+ (void) strcat(missing_list, FM_FMRI_CPU_CACHE_INDEX);
+ if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_WAY, &way) != 0)
+ (void) strcat(missing_list, FM_FMRI_CPU_CACHE_WAY);
+ if (nvlist_lookup_uint8(nvl, FM_FMRI_CPU_CACHE_TYPE, &type) != 0)
+ (void) strcat(missing_list, FM_FMRI_CPU_CACHE_TYPE);
+
+ if (strlen(missing_list) != 0) {
+ fmd_hdl_debug(hdl,
+ "\ncmd_fmri_nvl2str: missing %s in fmri\n",
+ missing_list);
+ return (-1);
+ }
+
+ return (snprintf(buf, buflen,
+ "cpu:///%s=%u/%s=%s/%s=%u/%s=%u/%s=%d",
+ FM_FMRI_CPU_ID, cpuid,
+ FM_FMRI_CPU_SERIAL_ID, serstr,
+ FM_FMRI_CPU_CACHE_INDEX, index,
+ FM_FMRI_CPU_CACHE_WAY, way,
+ FM_FMRI_CPU_CACHE_TYPE, type));
+}
+
+static int
+cmd_repair_fmri(fmd_hdl_t *hdl, char *buf)
+{
+ fmd_adm_t *ap;
+ int err;
+
+ if ((ap = fmd_adm_open(NULL, FMD_ADM_PROGRAM,
+ FMD_ADM_VERSION)) == NULL) {
+ fmd_hdl_debug(hdl, "Could not contact fmadm to unretire\n");
+ return (-1);
+ }
+ err = fmd_adm_rsrc_repair(ap, buf);
+ if (err)
+ err = -1;
+ fmd_adm_close(ap);
+ return (err);
+}
static cmd_Lxcache_t *
Lxcache_wrapv1(fmd_hdl_t *hdl, cmd_Lxcache_pers_t *pers, size_t psz)
@@ -465,6 +541,20 @@ cmd_Lxcache_fault(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache,
fmd_case_add_suspect(hdl, Lxcache->Lxcache_case.cc_cp, flt);
fmd_case_solve(hdl, Lxcache->Lxcache_case.cc_cp);
+ if (Lxcache->Lxcache_retired_fmri[0] == 0) {
+ if (cmd_fmri_nvl2str(hdl, Lxcache->Lxcache_asru.fmri_nvl,
+ Lxcache->Lxcache_retired_fmri,
+ sizeof (Lxcache->Lxcache_retired_fmri)) == -1)
+ fmd_hdl_debug(hdl,
+ "\n%s:cpu_id %d: Failed to save the"
+ " retired fmri string\n",
+ fltnm, cpu->cpu_cpuid);
+ else
+ fmd_hdl_debug(hdl,
+ "\n%s:cpu_id %d:Saved the retired fmri string %s\n",
+ fltnm, cpu->cpu_cpuid,
+ Lxcache->Lxcache_retired_fmri);
+ }
/* Retrieve the number of retired ways for each category */
cpu_retired_1 = cmd_Lx_index_count_type1_ways(cpu);
@@ -666,3 +756,203 @@ is_index_way_retired(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype, uint32_t index,
return (1);
return (0);
}
+int
+cmd_cache_way_retire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
+{
+ char *fltnm;
+ cache_info_t cache_info;
+ int ret, fd;
+
+ fltnm = cmd_type_to_str(Lxcache->Lxcache_type);
+ fd = open(mem_cache_device, O_RDWR);
+ if (fd == -1) {
+ fmd_hdl_debug(hdl,
+ "fltnm:cpu_id %d open of %s failed\n",
+ fltnm, cpu->cpu_cpuid, mem_cache_device);
+ return (B_FALSE);
+ }
+ cache_info.cpu_id = cpu->cpu_cpuid;
+ cache_info.way = Lxcache->Lxcache_way;
+ cache_info.bit = Lxcache->Lxcache_bit;
+ cache_info.index = Lxcache->Lxcache_index;
+
+ switch (Lxcache->Lxcache_type) {
+ case CMD_PTR_CPU_L2TAG:
+ cache_info.cache = L2_CACHE_TAG;
+ break;
+ case CMD_PTR_CPU_L2DATA:
+ cache_info.cache = L2_CACHE_DATA;
+ break;
+ case CMD_PTR_CPU_L3TAG:
+ cache_info.cache = L3_CACHE_TAG;
+ break;
+ case CMD_PTR_CPU_L3DATA:
+ cache_info.cache = L3_CACHE_DATA;
+ break;
+ }
+
+ fmd_hdl_debug(hdl,
+ "\n%s:cpu %d: Retiring index %d, way %d bit %d\n",
+ fltnm, cpu->cpu_cpuid, cache_info.index, cache_info.way,
+ (int16_t)cache_info.bit);
+ ret = ioctl(fd, MEM_CACHE_RETIRE, &cache_info);
+ (void) close(fd);
+ if (ret == -1) {
+ fmd_hdl_debug(hdl,
+ "fltnm:cpu_id %d MEM_CACHE_RETIRE ioctl failed\n",
+ fltnm, cpu->cpu_cpuid);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+boolean_t
+cmd_cache_way_unretire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
+{
+ char *fltnm;
+ cache_info_t cache_info;
+ int ret, fd;
+
+ fltnm = cmd_type_to_str(Lxcache->Lxcache_type);
+ fd = open(mem_cache_device, O_RDWR);
+ if (fd == -1) {
+ fmd_hdl_debug(hdl,
+ "fltnm:cpu_id %d open of %s failed\n",
+ fltnm, cpu->cpu_cpuid, mem_cache_device);
+ return (B_FALSE);
+ }
+ cache_info.cpu_id = cpu->cpu_cpuid;
+ cache_info.way = Lxcache->Lxcache_way;
+ cache_info.bit = Lxcache->Lxcache_bit;
+ cache_info.index = Lxcache->Lxcache_index;
+
+ switch (Lxcache->Lxcache_type) {
+ case CMD_PTR_CPU_L2TAG:
+ cache_info.cache = L2_CACHE_TAG;
+ break;
+ case CMD_PTR_CPU_L2DATA:
+ cache_info.cache = L2_CACHE_DATA;
+ break;
+ case CMD_PTR_CPU_L3TAG:
+ cache_info.cache = L3_CACHE_TAG;
+ break;
+ case CMD_PTR_CPU_L3DATA:
+ cache_info.cache = L3_CACHE_DATA;
+ break;
+ }
+
+ fmd_hdl_debug(hdl,
+ "\n%s:cpu %d: Unretiring index %d, way %d bit %d\n",
+ fltnm, cpu->cpu_cpuid, cache_info.index, cache_info.way,
+ (int16_t)cache_info.bit);
+ ret = ioctl(fd, MEM_CACHE_UNRETIRE, &cache_info);
+ (void) close(fd);
+ if (ret == -1) {
+ fmd_hdl_debug(hdl,
+ "fltnm:cpu_id %d MEM_CACHE_UNRETIRE ioctl failed\n",
+ fltnm, cpu->cpu_cpuid);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static cmd_Lxcache_t *
+cmd_Lxcache_lookup_by_type_index_way_flags(cmd_cpu_t *cpu,
+ cmd_ptrsubtype_t type, uint32_t index, int8_t way, int32_t flags)
+{
+ cmd_Lxcache_t *cmd_Lxcache;
+
+ for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
+ cmd_Lxcache != NULL;
+ cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
+ if ((cmd_Lxcache->Lxcache_index == index) &&
+ (cmd_Lxcache->Lxcache_way == way) &&
+ (cmd_Lxcache->Lxcache_type == type) &&
+ (cmd_Lxcache->Lxcache_flags & flags))
+ return (cmd_Lxcache);
+ }
+ return (NULL);
+}
+boolean_t
+cmd_Lxcache_unretire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *cmd_Lxcache,
+ const char *fltnm)
+{
+ cmd_ptrsubtype_t data_type;
+ cmd_Lxcache_t *retired_Lxcache;
+
+ /*
+ * If we are unretiring a cacheline retired due to suspected TAG
+ * fault, then we must first check if we are using a cacheline
+ * that was retired earlier for DATA fault.
+ * If so we will not unretire the cacheline.
+ * We will change the flags to reflect the current condition.
+ * We will return success, though.
+ */
+ if ((cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L2TAG) ||
+ (cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L3TAG)) {
+ if (cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L2TAG)
+ data_type = CMD_PTR_CPU_L2DATA;
+ if (cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L3TAG)
+ data_type = CMD_PTR_CPU_L3DATA;
+ fmd_hdl_debug(hdl,
+ "\n%s:cpuid %d checking if there is a %s"
+ " cacheline re-retired at this index %d and way %d\n",
+ fltnm, cpu->cpu_cpuid, cmd_type_to_str(data_type),
+ cmd_Lxcache->Lxcache_index, cmd_Lxcache->Lxcache_way);
+ retired_Lxcache = cmd_Lxcache_lookup_by_type_index_way_flags(
+ cpu, data_type, cmd_Lxcache->Lxcache_index,
+ cmd_Lxcache->Lxcache_way, CMD_LxCACHE_F_RERETIRED);
+ if (retired_Lxcache) {
+ retired_Lxcache->Lxcache_flags = CMD_LxCACHE_F_RETIRED;
+ cmd_Lxcache->Lxcache_flags = CMD_LxCACHE_F_UNRETIRED;
+ return (B_TRUE);
+ }
+ }
+ if (cmd_cache_way_unretire(hdl, cpu, cmd_Lxcache) == B_FALSE)
+ return (B_FALSE);
+ cmd_Lxcache->Lxcache_flags = CMD_LxCACHE_F_UNRETIRED;
+ /*
+ * We have unretired the cacheline. We need to inform the fmd
+ * that we have repaired the faulty fmri that we retired earlier.
+ * The cpumem agent will not unretire cacheline in response to
+ * the list.repair events it receives.
+ */
+ if (cmd_Lxcache->Lxcache_retired_fmri[0] != 0) {
+ fmd_hdl_debug(hdl,
+ "\n%s:cpuid %d Repairing the retired fmri %s",
+ fltnm, cpu->cpu_cpuid,
+ cmd_Lxcache->Lxcache_retired_fmri);
+ if (cmd_repair_fmri(hdl,
+ cmd_Lxcache->Lxcache_retired_fmri) != 0) {
+ fmd_hdl_debug(hdl,
+ "\n%s:cpuid %d Failed to repair"
+ " retired fmri.",
+ fltnm, cpu->cpu_cpuid);
+ /*
+ * We need to retire the cacheline that we just
+ * unretired.
+ */
+ if (cmd_cache_way_retire(hdl, cpu, cmd_Lxcache)
+ == B_FALSE) {
+ /*
+ * A hopeless situation.
+ * cannot maintain consistency of cacheline
+ * sate between fmd and DE.
+ * Aborting the DE.
+ */
+ fmd_hdl_abort(hdl,
+ "\n%s:cpuid %d We are unable to repair"
+ " the fmri we just unretired and are"
+ " unable to restore the DE and fmd to"
+ " a sane state.\n",
+ fltnm, cpu->cpu_cpuid);
+ }
+ return (B_FALSE);
+ } else {
+ cmd_Lxcache->Lxcache_retired_fmri[0] = 0;
+ }
+ }
+ return (B_TRUE);
+}
diff --git a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c
index e1ad243d9f..30847d7f17 100644
--- a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c
+++ b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcacheerr.c
@@ -834,66 +834,6 @@ cmd_cache_valid_way_check(fmd_hdl_t *hdl, uint64_t ec_tag, uint64_t afar,
return (ret_val);
}
-
-ssize_t
-cmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
-{
- int err = 0;
- uint8_t type;
- uint32_t cpuid, index, way;
- char *serstr = NULL;
-
- err = nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid);
- err |= nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID, &serstr);
- err |= nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_INDEX, &index);
- err |= nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_WAY, &way);
- err |= nvlist_lookup_uint8(nvl, FM_FMRI_CPU_CACHE_TYPE, &type);
-
- if (err)
- return (-1);
-
- return (snprintf(buf, buflen,
- "cpu:///%s=%u/%s=%s/%s=%u/%s=%u/%s=%u/%s=%d",
- FM_FMRI_CPU_ID, cpuid,
- FM_FMRI_CPU_SERIAL_ID, serstr,
- FM_FMRI_CPU_CACHE_INDEX, index,
- FM_FMRI_CPU_CACHE_WAY, way,
- FM_FMRI_CPU_CACHE_TYPE, type));
-}
-
-
-int
-cmd_Lx_repair_rsrc(fmd_hdl_t *hdl, nvlist_t *nvl)
-{
- fmd_adm_t *ap;
- char *buf = NULL;
- ssize_t buflen;
- int err;
-
- if ((ap = fmd_adm_open(NULL, FMD_ADM_PROGRAM,
- FMD_ADM_VERSION)) == NULL) {
- fmd_hdl_debug(hdl, "Could not contact fmadm to unretire\n");
- return (-1);
- }
- if ((buflen = cmd_fmri_nvl2str(nvl, NULL, 0)) == -1 ||
- (buf = fmd_hdl_zalloc(hdl, buflen + 1, FMD_NOSLEEP)) == NULL ||
- cmd_fmri_nvl2str(nvl, buf, buflen + 1) == -1) {
- fmd_hdl_debug(hdl, "Failed to reload asru for repair");
- if (buf != NULL)
- fmd_hdl_free(hdl, buf, buflen + 1);
- err = -1;
- goto out;
- }
-
- err = fmd_adm_rsrc_repair(ap, buf);
- if (err)
- err = -1;
-out:
- fmd_adm_close(ap);
- return (err);
-}
-
-
/* Find the lowest way SERD engine not faulted for the given index */
uint32_t
@@ -946,10 +886,10 @@ cmd_Lx_lookup_lowest_suspicous_way(cmd_Lxcache_t **other_cache, cmd_cpu_t *cpu,
int32_t index, cmd_ptrsubtype_t pstype)
{
cmd_Lxcache_t *cache = NULL;
- uint32_t way, way1;
+ int32_t way, way1 = -1;
*other_cache = NULL;
- for (way = 0; way < LX_NWAYS - 1; way++) {
+ for (way = 0; way < LX_NWAYS; way++) {
cache = cmd_Lxcache_lookup_by_index_way(cpu, pstype,
index, way);
if (cache != NULL &&
@@ -964,11 +904,13 @@ cmd_Lx_lookup_lowest_suspicous_way(cmd_Lxcache_t **other_cache, cmd_cpu_t *cpu,
} else {
pstype = CMD_PTR_CPU_L3TAG;
}
- for (way = 0; way < LX_NWAYS - 1; way++) {
+ for (way = 0; way < LX_NWAYS; way++) {
cache = cmd_Lxcache_lookup_by_index_way(cpu, pstype,
index, way);
if (cache != NULL &&
(cache->Lxcache_reason == CMD_LXSUSPICOUS)) {
+ if (way1 == -1)
+ return (way);
/* Return the smaller of the two */
if (way < way1) {
*other_cache = cache;
@@ -978,7 +920,8 @@ cmd_Lx_lookup_lowest_suspicous_way(cmd_Lxcache_t **other_cache, cmd_cpu_t *cpu,
}
}
}
- return ((uint32_t)-1);
+ /* if there are no suspicious tag ways, we fall through */
+ return (way1);
}
/* Count the number of ways convicted for a given index */
@@ -1300,22 +1243,13 @@ cmd_cache_ce_panther(fmd_hdl_t *hdl, fmd_event_t *ep, cmd_xr_t *xr)
cmd_Lxcache_destroy(hdl, xr->xr_cpu,
other_cache);
-
- /* Repair the cache line */
- if (nvlist_add_uint32(repair_nvl,
- FM_FMRI_CPU_CACHE_WAY, unretire_way) == 0)
- if (cmd_Lx_repair_rsrc(hdl,
- repair_nvl)) {
- fmd_hdl_debug(hdl, "failed"
- " to repair index %d"
- " way %d\n",
- xr->xr_error_index,
- unretire_way);
- }
- else
- fmd_hdl_debug(hdl, "failed to add"
- "way to nvl to repair resource",
- "with way %d\n", unretire_way);
+ /*
+ * Unretire the cacheline from DE.
+ */
+ if (cmd_Lxcache_unretire(hdl, cpu,
+ other_cache,
+ cache_ed->ed_fltnm) == B_FALSE)
+ return (CMD_EVD_BAD);
}
/* Indicate our reason for retiring */
cache->Lxcache_reason = CMD_LXSUSPICOUS;