summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/fm/dicts/FMD.dict2
-rw-r--r--usr/src/cmd/fm/dicts/FMD.po38
-rw-r--r--usr/src/cmd/fm/eversholt/common/check.c15
-rw-r--r--usr/src/cmd/fm/eversholt/common/literals.h1
-rw-r--r--usr/src/cmd/fm/fmadm/common/faulty.c126
-rw-r--r--usr/src/cmd/fm/fmadm/common/fmadm.c10
-rw-r--r--usr/src/cmd/fm/fmadm/common/fmadm.h10
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd.c19
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_api.c113
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_api.h10
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_api.map5
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_asru.c329
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_asru.h18
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_case.c327
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_case.h5
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_dispq.c16
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_fmri.c36
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_fmri.h17
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_protocol.c6
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_protocol.h2
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c94
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_rpc_adm.x7
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_scheme.c45
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_scheme.h9
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_self.c4
-rw-r--r--usr/src/cmd/fm/fmdump/common/fault.c33
-rw-r--r--usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c38
-rw-r--r--usr/src/cmd/fm/modules/common/disk-monitor/disk-monitor.conf3
-rw-r--r--usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c18
-rw-r--r--usr/src/cmd/fm/modules/common/eversholt/eval.c23
-rw-r--r--usr/src/cmd/fm/modules/common/eversholt/fme.c5
-rw-r--r--usr/src/cmd/fm/modules/common/io-retire/io-retire.conf3
-rw-r--r--usr/src/cmd/fm/modules/common/io-retire/rio_main.c80
-rw-r--r--usr/src/cmd/fm/modules/common/snmp-trapgen/snmp.c1
-rw-r--r--usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c4
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf3
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c9
-rw-r--r--usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c2
-rw-r--r--usr/src/cmd/fm/schemes/cpu/cpu.c59
-rw-r--r--usr/src/cmd/fm/schemes/dev/scheme.c45
-rw-r--r--usr/src/cmd/fm/schemes/hc/scheme.c29
-rw-r--r--usr/src/cmd/fm/schemes/mem/mem.c117
-rw-r--r--usr/src/lib/fm/libfmd_adm/common/fmd_adm.c68
-rw-r--r--usr/src/lib/fm/libfmd_adm/common/fmd_adm.h7
-rw-r--r--usr/src/lib/fm/libfmd_adm/common/mapfile-vers7
-rw-r--r--usr/src/lib/fm/libfmd_snmp/common/fmd_snmp.h13
-rw-r--r--usr/src/lib/fm/libfmd_snmp/common/problem.c212
-rw-r--r--usr/src/lib/fm/libfmd_snmp/common/problem.h4
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/dev.c145
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/hc.c84
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/libtopo.h2
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/mapfile-vers2
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/topo_fmri.c59
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/topo_mod.h12
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/chip.h2
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/chip_amd.c3
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c67
-rw-r--r--usr/src/lib/fm/topo/modules/sun4v/platform-cpu/cpu.c55
-rw-r--r--usr/src/lib/fm/topo/modules/sun4v/platform-mem/mem.c46
-rw-r--r--usr/src/lib/libdevinfo/devinfo.c2
-rw-r--r--usr/src/lib/libdevinfo/libdevinfo.h1
-rw-r--r--usr/src/uts/common/os/devcfg.c3
-rw-r--r--usr/src/uts/common/sys/fm/protocol.h14
63 files changed, 2235 insertions, 309 deletions
diff --git a/usr/src/cmd/fm/dicts/FMD.dict b/usr/src/cmd/fm/dicts/FMD.dict
index 762e90e1b1..547f9581c5 100644
--- a/usr/src/cmd/fm/dicts/FMD.dict
+++ b/usr/src/cmd/fm/dicts/FMD.dict
@@ -31,3 +31,5 @@ defect.sunos.fmd.nodiagcode=1
defect.sunos.fmd.module=2
defect.sunos.fmd.config=3
list.repaired=4
+list.updated=5
+list.resolved=6
diff --git a/usr/src/cmd/fm/dicts/FMD.po b/usr/src/cmd/fm/dicts/FMD.po
index 399c66a990..f678ba0ad7 100644
--- a/usr/src/cmd/fm/dicts/FMD.po
+++ b/usr/src/cmd/fm/dicts/FMD.po
@@ -110,10 +110,42 @@ msgstr "Repair"
msgid "FMD-8000-4M.severity"
msgstr "Minor"
msgid "FMD-8000-4M.description"
-msgstr "All faults associated with an event id have been addressed. Refer to %s for more information."
+msgstr "All faults associated with an event id have been addressed.\n Refer to %s for more information."
msgid "FMD-8000-4M.response"
-msgstr "Any system components offlined becase of the original fault have been brought back online."
+msgstr "Some system components offlined because of the original fault may have been brought back online.\n"
msgid "FMD-8000-4M.impact"
-msgstr "Performance degradation of the system due to the original fault has been recovered."
+msgstr "Performance degradation of the system due to the original fault may have been recovered.\n"
msgid "FMD-8000-4M.action"
msgstr "Use fmdump -v -u <EVENT-ID> to identify the repaired components."
+#
+# code: FMD-8000-58
+# keys: list.updated
+#
+msgid "FMD-8000-58.type"
+msgstr "Update"
+msgid "FMD-8000-58.severity"
+msgstr "Minor"
+msgid "FMD-8000-58.description"
+msgstr "Some faults associated with an event id have been addressed.\n Refer to %s for more information."
+msgid "FMD-8000-58.response"
+msgstr "Some system components offlined because of the original fault may have been brought back online.\n"
+msgid "FMD-8000-58.impact"
+msgstr "Performance degradation of the system due to the original fault may have been recovered.\n"
+msgid "FMD-8000-58.action"
+msgstr "Use fmadm faulty to identify the repaired components, and any suspects that still need to be repaired.\n"
+#
+# code: FMD-8000-6U
+# keys: list.resolved
+#
+msgid "FMD-8000-6U.type"
+msgstr "Resolved"
+msgid "FMD-8000-6U.severity"
+msgstr "Minor"
+msgid "FMD-8000-6U.description"
+msgstr "All faults associated with an event id have been addressed.\n Refer to %s for more information."
+msgid "FMD-8000-6U.response"
+msgstr "All system components offlined because of the original fault have been brought back online.\n"
+msgid "FMD-8000-6U.impact"
+msgstr "Performance degradation of the system due to the original fault has been recovered.\n"
+msgid "FMD-8000-6U.action"
+msgstr "Use fmdump -v -u <EVENT-ID> to identify the repaired components.\n"
diff --git a/usr/src/cmd/fm/eversholt/common/check.c b/usr/src/cmd/fm/eversholt/common/check.c
index e297a3feaa..668cbcc3b1 100644
--- a/usr/src/cmd/fm/eversholt/common/check.c
+++ b/usr/src/cmd/fm/eversholt/common/check.c
@@ -1119,6 +1119,21 @@ check_func(struct node *np)
"argument to is_present() must be a path or a call "
"to fru() or asru()");
}
+ } else if (np->u.func.s == L_has_fault) {
+ if (arglist->t == T_LIST &&
+ (arglist->u.expr.left->t == T_NAME ||
+ (arglist->u.expr.left->t == T_FUNC &&
+ (arglist->u.expr.left->u.func.s == L_fru ||
+ arglist->u.expr.left->u.func.s == L_asru))) &&
+ arglist->u.expr.right->t == T_QUOTE) {
+ if (arglist->u.expr.left->t == T_FUNC)
+ check_func(arglist->u.expr.left);
+ } else {
+ outfl(O_ERR, arglist->file, arglist->line,
+ "%s() must have path or call to "
+ "fru() and/or asru() as first argument; "
+ "second argument must be a string", np->u.func.s);
+ }
} else if (np->u.func.s == L_is_type) {
if (arglist->t == T_NAME ||
(arglist->t == T_FUNC &&
diff --git a/usr/src/cmd/fm/eversholt/common/literals.h b/usr/src/cmd/fm/eversholt/common/literals.h
index 9dd790d68b..1f0fd3ef22 100644
--- a/usr/src/cmd/fm/eversholt/common/literals.h
+++ b/usr/src/cmd/fm/eversholt/common/literals.h
@@ -169,6 +169,7 @@ L_DECL(is_connected);
L_DECL(is_under);
L_DECL(is_on);
L_DECL(is_present);
+L_DECL(has_fault);
L_DECL(is_type);
L_DECL(count);
diff --git a/usr/src/cmd/fm/fmadm/common/faulty.c b/usr/src/cmd/fm/fmadm/common/faulty.c
index 61141b7775..87f7dcaa2b 100644
--- a/usr/src/cmd/fm/fmadm/common/faulty.c
+++ b/usr/src/cmd/fm/fmadm/common/faulty.c
@@ -97,7 +97,7 @@
*
* Fault class : fault.memory.dimm_sb
* Affects : mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0
- * degraded but still in service
+ * faulted but still in service
* FRU : "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0)
* faulty
*
@@ -1062,7 +1062,8 @@ extract_record_info(nvlist_t *nvl, name_list_t **class_p,
name = get_nvl2str_topo(lfru);
if (name != NULL) {
nlp = alloc_name_list(name, lpct);
- nlp->status = status & ~FM_SUSPECT_UNUSABLE;
+ nlp->status = status & ~(FM_SUSPECT_UNUSABLE |
+ FM_SUSPECT_DEGRADED);
free(name);
if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION,
&label) == 0)
@@ -1075,7 +1076,9 @@ extract_record_info(nvlist_t *nvl, name_list_t **class_p,
name = get_nvl2str_topo(lasru);
if (name != NULL) {
nlp = alloc_name_list(name, lpct);
- nlp->status = status & ~FM_SUSPECT_NOT_PRESENT;
+ nlp->status = status & ~(FM_SUSPECT_NOT_PRESENT |
+ FM_SUSPECT_REPAIRED | FM_SUSPECT_REPLACED |
+ FM_SUSPECT_ACQUITTED);
free(name);
(void) merge_name_list(asru_p, nlp, 1);
}
@@ -1315,11 +1318,20 @@ print_asru_status(int status, char *label)
case 0:
msg = dgettext("FMD", "ok and in service");
break;
+ case FM_SUSPECT_DEGRADED:
+ msg = dgettext("FMD", "service degraded, "
+ "but associated components no longer faulty");
+ break;
+ case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED:
+ msg = dgettext("FMD", "faulted but still "
+ "providing degraded service");
+ break;
case FM_SUSPECT_FAULTY:
- msg = dgettext("FMD", "degraded but still in service");
+ msg = dgettext("FMD", "faulted but still in service");
break;
case FM_SUSPECT_UNUSABLE:
- msg = dgettext("FMD", "unknown, not present or disabled");
+ msg = dgettext("FMD", "out of service, "
+ "but associated components no longer faulty");
break;
case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE:
msg = dgettext("FMD", "faulted and taken out of service");
@@ -1341,8 +1353,14 @@ print_fru_status(int status, char *label)
msg = dgettext("FMD", "not present");
else if (status & FM_SUSPECT_FAULTY)
msg = dgettext("FMD", "faulty");
+ else if (status & FM_SUSPECT_REPLACED)
+ msg = dgettext("FMD", "replaced");
+ else if (status & FM_SUSPECT_REPAIRED)
+ msg = dgettext("FMD", "repair attempted");
+ else if (status & FM_SUSPECT_ACQUITTED)
+ msg = dgettext("FMD", "acquitted");
else
- msg = dgettext("FMD", "repaired");
+ msg = dgettext("FMD", "removed");
(void) printf("%s %s\n", label, msg);
}
@@ -1727,8 +1745,15 @@ print_fru(int summary, int opt_a, int opt_i, int page_feed)
(void) printf(dgettext("FMD", "not present\n"));
else if (status & FM_SUSPECT_FAULTY)
(void) printf(dgettext("FMD", "faulty\n"));
+ else if (status & FM_SUSPECT_REPLACED)
+ (void) printf(dgettext("FMD", "replaced\n"));
+ else if (status & FM_SUSPECT_REPAIRED)
+ (void) printf(dgettext("FMD",
+ "repair attempted\n"));
+ else if (status & FM_SUSPECT_ACQUITTED)
+ (void) printf(dgettext("FMD", "acquitted\n"));
else
- (void) printf(dgettext("FMD", "repaired\n"));
+ (void) printf(dgettext("FMD", "removed\n"));
slp = tp->status_rec_list;
end = slp;
@@ -1811,6 +1836,12 @@ print_asru(int opt_a)
case 0:
msg = dgettext("FMD", "ok");
break;
+ case FM_SUSPECT_DEGRADED:
+ msg = dgettext("FMD", "degraded");
+ break;
+ case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED:
+ msg = dgettext("FMD", "degraded");
+ break;
case FM_SUSPECT_FAULTY:
msg = dgettext("FMD", "degraded");
break;
@@ -2017,12 +2048,12 @@ cmd_repair(fmd_adm_t *adm, int argc, char *argv[])
return (FMADM_EXIT_USAGE);
/*
- * argument could be a uuid, and fmri (asru, fru or resource)
+ * argument could be a uuid, an fmri (asru, fru or resource)
* or a label. Try uuid first, If that fails try the others.
*/
err = fmd_adm_case_repair(adm, argv[optind]);
if (err != 0)
- err = fmd_adm_rsrc_repair(adm, argv[optind]);
+ err = fmd_adm_rsrc_repaired(adm, argv[optind]);
if (err != 0)
die("failed to record repair to %s", argv[optind]);
@@ -2030,3 +2061,80 @@ cmd_repair(fmd_adm_t *adm, int argc, char *argv[])
note("recorded repair to %s\n", argv[optind]);
return (FMADM_EXIT_SUCCESS);
}
+
+int
+cmd_repaired(fmd_adm_t *adm, int argc, char *argv[])
+{
+ int err;
+
+ if (getopt(argc, argv, "") != EOF)
+ return (FMADM_EXIT_USAGE);
+
+ if (argc - optind != 1)
+ return (FMADM_EXIT_USAGE);
+
+ /*
+ * argument could be an fmri (asru, fru or resource) or a label.
+ */
+ err = fmd_adm_rsrc_repaired(adm, argv[optind]);
+ if (err != 0)
+ die("failed to record repair to %s", argv[optind]);
+
+ note("recorded repair to of %s\n", argv[optind]);
+ return (FMADM_EXIT_SUCCESS);
+}
+
+int
+cmd_replaced(fmd_adm_t *adm, int argc, char *argv[])
+{
+ int err;
+
+ if (getopt(argc, argv, "") != EOF)
+ return (FMADM_EXIT_USAGE);
+
+ if (argc - optind != 1)
+ return (FMADM_EXIT_USAGE);
+
+ /*
+ * argument could be an fmri (asru, fru or resource) or a label.
+ */
+ err = fmd_adm_rsrc_replaced(adm, argv[optind]);
+ if (err != 0)
+ die("failed to record replacement of %s", argv[optind]);
+
+ note("recorded replacement of %s\n", argv[optind]);
+ return (FMADM_EXIT_SUCCESS);
+}
+
+int
+cmd_acquit(fmd_adm_t *adm, int argc, char *argv[])
+{
+ int err;
+
+ if (getopt(argc, argv, "") != EOF)
+ return (FMADM_EXIT_USAGE);
+
+ if (argc - optind != 1 && argc - optind != 2)
+ return (FMADM_EXIT_USAGE);
+
+ /*
+ * argument could be a uuid, an fmri (asru, fru or resource)
+ * or a label. Or it could be a uuid and an fmri or label.
+ */
+ if (argc - optind == 2) {
+ err = fmd_adm_rsrc_acquit(adm, argv[optind], argv[optind + 1]);
+ if (err != 0)
+ err = fmd_adm_rsrc_acquit(adm, argv[optind + 1],
+ argv[optind]);
+ } else {
+ err = fmd_adm_case_acquit(adm, argv[optind]);
+ if (err != 0)
+ err = fmd_adm_rsrc_acquit(adm, argv[optind], "");
+ }
+
+ if (err != 0)
+ die("failed to record acquital of %s", argv[optind]);
+
+ note("recorded acquital of %s\n", argv[optind]);
+ return (FMADM_EXIT_SUCCESS);
+}
diff --git a/usr/src/cmd/fm/fmadm/common/fmadm.c b/usr/src/cmd/fm/fmadm/common/fmadm.c
index 5466590590..79c23b419d 100644
--- a/usr/src/cmd/fm/fmadm/common/fmadm.c
+++ b/usr/src/cmd/fm/fmadm/common/fmadm.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -108,7 +108,13 @@ static const struct cmd {
{ cmd_flush, "flush", "<fmri> ...", "flush cached state for resource" },
{ cmd_gc, "gc", "<module>", NULL },
{ cmd_load, "load", "<path>", "load specified fault manager module" },
-{ cmd_repair, "repair", "<fmri>|<uuid>", "record repair to resource(s)" },
+{ cmd_repair, "repair", "<fmri>|label|<uuid>", NULL },
+{ cmd_repaired, "repaired", "<fmri>|label>",
+ "notify fault manager that resource has been repaired" },
+{ cmd_acquit, "acquit", "<fmri> [<uuid>] | label [<uuid>] | <uuid>",
+ "acquit resource or acquit case" },
+{ cmd_replaced, "replaced", "<fmri>|label",
+ "notify fault manager that resource has been replaced" },
{ cmd_reset, "reset", "[-s serd] <module>", "reset module or sub-component" },
{ cmd_rotate, "rotate", "<logname>", "rotate log file" },
{ cmd_unload, "unload", "<module>", "unload specified fault manager module" },
diff --git a/usr/src/cmd/fm/fmadm/common/fmadm.h b/usr/src/cmd/fm/fmadm/common/fmadm.h
index 4c5b2760cd..e94eeacb39 100644
--- a/usr/src/cmd/fm/fmadm/common/fmadm.h
+++ b/usr/src/cmd/fm/fmadm/common/fmadm.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -50,6 +49,9 @@ extern int cmd_flush(fmd_adm_t *, int, char *[]);
extern int cmd_gc(fmd_adm_t *, int, char *[]);
extern int cmd_load(fmd_adm_t *, int, char *[]);
extern int cmd_repair(fmd_adm_t *, int, char *[]);
+extern int cmd_repaired(fmd_adm_t *, int, char *[]);
+extern int cmd_replaced(fmd_adm_t *, int, char *[]);
+extern int cmd_acquit(fmd_adm_t *, int, char *[]);
extern int cmd_reset(fmd_adm_t *, int, char *[]);
extern int cmd_rotate(fmd_adm_t *, int, char *[]);
extern int cmd_unload(fmd_adm_t *, int, char *[]);
diff --git a/usr/src/cmd/fm/fmd/common/fmd.c b/usr/src/cmd/fm/fmd/common/fmd.c
index 04e5adb64a..96b3d7d933 100644
--- a/usr/src/cmd/fm/fmd/common/fmd.c
+++ b/usr/src/cmd/fm/fmd/common/fmd.c
@@ -255,7 +255,7 @@ static const fmd_conf_formal_t _fmd_conf[] = {
{ "debug", &fmd_debug_ops, NULL }, /* daemon debugging flags */
{ "dictdir", &fmd_conf_string, "usr/lib/fm/dict" }, /* default diagcode dir */
{ "domain", &fmd_conf_string, NULL }, /* domain id for de auth */
-{ "fakenotpresent", &fmd_conf_bool, "false" }, /* simulate rsrc not present */
+{ "fakenotpresent", &fmd_conf_uint32, "0" }, /* simulate rsrc not present */
{ "fg", &fmd_conf_bool, "false" }, /* run daemon in foreground */
{ "gc_interval", &fmd_conf_time, "1d" }, /* garbage collection intvl */
{ "ids.avg", &fmd_conf_uint32, "4" }, /* desired idspace chain len */
@@ -272,6 +272,8 @@ static const fmd_conf_formal_t _fmd_conf[] = {
{ "machine", &fmd_conf_string, _fmd_uts.machine }, /* machine name (uname -m) */
{ "nodiagcode", &fmd_conf_string, "-" }, /* diagcode to use if error */
{ "repaircode", &fmd_conf_string, "-" }, /* diagcode for list.repaired */
+{ "resolvecode", &fmd_conf_string, "-" }, /* diagcode for list.resolved */
+{ "updatecode", &fmd_conf_string, "-" }, /* diagcode for list.updated */
{ "osrelease", &fmd_conf_string, _fmd_uts.release }, /* release (uname -r) */
{ "osversion", &fmd_conf_string, _fmd_uts.version }, /* version (uname -v) */
{ "platform", &fmd_conf_string, _fmd_plat }, /* platform string (uname -i) */
@@ -747,6 +749,8 @@ fmd_run(fmd_t *dp, int pfd)
{
char *nodc_key[] = { FMD_FLT_NODC, NULL };
char *repair_key[] = { FM_LIST_REPAIRED_CLASS, NULL };
+ char *resolve_key[] = { FM_LIST_RESOLVED_CLASS, NULL };
+ char *update_key[] = { FM_LIST_UPDATED_CLASS, NULL };
char code_str[128];
struct sigaction act;
@@ -896,6 +900,14 @@ fmd_run(fmd_t *dp, int pfd)
sizeof (code_str)) == 0)
(void) fmd_conf_setprop(dp->d_conf, "repaircode",
code_str);
+ if (fmd_module_dc_key2code(dp->d_self, resolve_key, code_str,
+ sizeof (code_str)) == 0)
+ (void) fmd_conf_setprop(dp->d_conf, "resolvecode",
+ code_str);
+ if (fmd_module_dc_key2code(dp->d_self, update_key, code_str,
+ sizeof (code_str)) == 0)
+ (void) fmd_conf_setprop(dp->d_conf, "updatecode",
+ code_str);
}
fmd_rpc_init();
@@ -939,6 +951,11 @@ fmd_run(fmd_t *dp, int pfd)
fmd_event_rele(e);
/*
+ * Now replay list.updated and list.repaired events
+ */
+ fmd_case_repair_replay();
+
+ /*
* Finally, awaken any threads associated with receiving events from
* open transports and tell them to proceed with fmd_xprt_recv().
*/
diff --git a/usr/src/cmd/fm/fmd/common/fmd_api.c b/usr/src/cmd/fm/fmd/common/fmd_api.c
index 5237fa3c44..8e1e6d717c 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_api.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_api.c
@@ -1137,6 +1137,20 @@ fmd_case_uuclosed(fmd_hdl_t *hdl, const char *uuid)
return (rv);
}
+void
+fmd_case_uuresolved(fmd_hdl_t *hdl, const char *uuid)
+{
+ fmd_module_t *mp = fmd_api_module_lock(hdl);
+ fmd_case_t *cp = fmd_case_hash_lookup(fmd.d_cases, uuid);
+
+ if (cp != NULL) {
+ fmd_case_transition(cp, FMD_CASE_RESOLVED, 0);
+ fmd_case_rele(cp);
+ }
+
+ fmd_module_unlock(mp);
+}
+
static int
fmd_case_instate(fmd_hdl_t *hdl, fmd_case_t *cp, uint_t state)
{
@@ -1846,6 +1860,23 @@ fmd_nvl_fmri_present(fmd_hdl_t *hdl, nvlist_t *nvl)
}
int
+fmd_nvl_fmri_replaced(fmd_hdl_t *hdl, nvlist_t *nvl)
+{
+ fmd_module_t *mp = fmd_api_module_lock(hdl);
+ int rv;
+
+ if (nvl == NULL) {
+ fmd_api_error(mp, EFMD_NVL_INVAL,
+ "invalid nvlist %p\n", (void *)nvl);
+ }
+
+ rv = fmd_fmri_replaced(nvl);
+ fmd_module_unlock(mp);
+
+ return (rv);
+}
+
+int
fmd_nvl_fmri_unusable(fmd_hdl_t *hdl, nvlist_t *nvl)
{
fmd_module_t *mp = fmd_api_module_lock(hdl);
@@ -1868,23 +1899,91 @@ fmd_nvl_fmri_unusable(fmd_hdl_t *hdl, nvlist_t *nvl)
}
int
-fmd_nvl_fmri_faulty(fmd_hdl_t *hdl, nvlist_t *nvl)
+fmd_nvl_fmri_service_state(fmd_hdl_t *hdl, nvlist_t *nvl)
{
fmd_module_t *mp = fmd_api_module_lock(hdl);
- fmd_asru_hash_t *ahp = fmd.d_asrus;
- fmd_asru_t *ap;
- int rv = 0;
+ int rv;
if (nvl == NULL) {
fmd_api_error(mp, EFMD_NVL_INVAL,
"invalid nvlist %p\n", (void *)nvl);
}
- if ((ap = fmd_asru_hash_lookup_nvl(ahp, nvl)) != NULL) {
- rv = (ap->asru_flags & FMD_ASRU_FAULTY) != 0;
- fmd_asru_hash_release(ahp, ap);
+ rv = fmd_fmri_service_state(nvl);
+ if (rv < 0)
+ rv = fmd_fmri_unusable(nvl) ? FMD_SERVICE_STATE_UNUSABLE :
+ FMD_SERVICE_STATE_OK;
+ fmd_module_unlock(mp);
+
+ if (rv < 0) {
+ fmd_api_error(mp, EFMD_FMRI_OP, "invalid fmri for "
+ "fmd_nvl_fmri_service_state\n");
}
+ return (rv);
+}
+
+typedef struct {
+ const char *class;
+ int *rvp;
+} fmd_has_fault_arg_t;
+
+static void
+fmd_rsrc_has_fault(fmd_asru_link_t *alp, void *arg)
+{
+ fmd_has_fault_arg_t *fhfp = (fmd_has_fault_arg_t *)arg;
+ char *class;
+
+ if (fhfp->class == NULL) {
+ if (alp->al_flags & FMD_ASRU_FAULTY)
+ *fhfp->rvp = 1;
+ } else {
+ if ((alp->al_flags & FMD_ASRU_FAULTY) &&
+ alp->al_event != NULL && nvlist_lookup_string(alp->al_event,
+ FM_CLASS, &class) == 0 && fmd_strmatch(class, fhfp->class))
+ *fhfp->rvp = 1;
+ }
+}
+
+int
+fmd_nvl_fmri_has_fault(fmd_hdl_t *hdl, nvlist_t *nvl, int type, char *class)
+{
+ fmd_module_t *mp = fmd_api_module_lock(hdl);
+ fmd_asru_hash_t *ahp = fmd.d_asrus;
+ int rv = 0;
+ char *name;
+ int namelen;
+ fmd_has_fault_arg_t fhf;
+
+ if (nvl == NULL) {
+ fmd_api_error(mp, EFMD_NVL_INVAL,
+ "invalid nvlist %p\n", (void *)nvl);
+ }
+ if ((namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1)
+ fmd_api_error(mp, EFMD_NVL_INVAL,
+ "invalid nvlist: %p\n", (void *)nvl);
+ name = fmd_alloc(namelen + 1, FMD_SLEEP);
+ if (fmd_fmri_nvl2str(nvl, name, namelen + 1) == -1) {
+ if (name != NULL)
+ fmd_free(name, namelen + 1);
+ fmd_api_error(mp, EFMD_NVL_INVAL,
+ "invalid nvlist: %p\n", (void *)nvl);
+ }
+
+ fhf.class = class;
+ fhf.rvp = &rv;
+ if (type == FMD_HAS_FAULT_RESOURCE)
+ fmd_asru_hash_apply_by_rsrc(ahp, name, fmd_rsrc_has_fault,
+ &fhf);
+ else if (type == FMD_HAS_FAULT_ASRU)
+ fmd_asru_hash_apply_by_asru(ahp, name, fmd_rsrc_has_fault,
+ &fhf);
+ else if (type == FMD_HAS_FAULT_FRU)
+ fmd_asru_hash_apply_by_fru(ahp, name, fmd_rsrc_has_fault,
+ &fhf);
+
+ if (name != NULL)
+ fmd_free(name, namelen + 1);
fmd_module_unlock(mp);
return (rv);
}
diff --git a/usr/src/cmd/fm/fmd/common/fmd_api.h b/usr/src/cmd/fm/fmd/common/fmd_api.h
index 3e30867eb0..9a6564c8bc 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_api.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_api.h
@@ -178,6 +178,7 @@ extern const char *fmd_case_uuid(fmd_hdl_t *, fmd_case_t *);
extern fmd_case_t *fmd_case_uulookup(fmd_hdl_t *, const char *);
extern void fmd_case_uuclose(fmd_hdl_t *, const char *);
extern int fmd_case_uuclosed(fmd_hdl_t *, const char *);
+extern void fmd_case_uuresolved(fmd_hdl_t *, const char *);
extern int fmd_case_solved(fmd_hdl_t *, fmd_case_t *);
extern int fmd_case_closed(fmd_hdl_t *, fmd_case_t *);
@@ -225,7 +226,14 @@ extern int fmd_nvl_class_match(fmd_hdl_t *, nvlist_t *, const char *);
extern int fmd_nvl_fmri_expand(fmd_hdl_t *, nvlist_t *);
extern int fmd_nvl_fmri_present(fmd_hdl_t *, nvlist_t *);
extern int fmd_nvl_fmri_unusable(fmd_hdl_t *, nvlist_t *);
-extern int fmd_nvl_fmri_faulty(fmd_hdl_t *, nvlist_t *);
+extern int fmd_nvl_fmri_replaced(fmd_hdl_t *, nvlist_t *);
+extern int fmd_nvl_fmri_service_state(fmd_hdl_t *, nvlist_t *);
+extern int fmd_nvl_fmri_has_fault(fmd_hdl_t *, nvlist_t *, int, char *);
+
+#define FMD_HAS_FAULT_FRU 0
+#define FMD_HAS_FAULT_ASRU 1
+#define FMD_HAS_FAULT_RESOURCE 2
+
extern int fmd_nvl_fmri_contains(fmd_hdl_t *, nvlist_t *, nvlist_t *);
extern nvlist_t *fmd_nvl_fmri_translate(fmd_hdl_t *, nvlist_t *, nvlist_t *);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_api.map b/usr/src/cmd/fm/fmd/common/fmd_api.map
index dd1535d543..0e064dafde 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_api.map
+++ b/usr/src/cmd/fm/fmd/common/fmd_api.map
@@ -49,6 +49,7 @@
fmd_case_uuclosed = FUNCTION extern;
fmd_case_uuid = FUNCTION extern;
fmd_case_uulookup = FUNCTION extern;
+ fmd_case_uuresolved = FUNCTION extern;
fmd_event_local = FUNCTION extern;
fmd_event_ena_create = FUNCTION extern;
@@ -80,8 +81,10 @@
fmd_nvl_dup = FUNCTION extern;
fmd_nvl_fmri_expand = FUNCTION extern;
fmd_nvl_fmri_present = FUNCTION extern;
+ fmd_nvl_fmri_replaced = FUNCTION extern;
fmd_nvl_fmri_unusable = FUNCTION extern;
- fmd_nvl_fmri_faulty = FUNCTION extern;
+ fmd_nvl_fmri_service_state = FUNCTION extern;
+ fmd_nvl_fmri_has_fault = FUNCTION extern;
fmd_nvl_fmri_contains = FUNCTION extern;
fmd_nvl_fmri_translate = FUNCTION extern;
diff --git a/usr/src/cmd/fm/fmd/common/fmd_asru.c b/usr/src/cmd/fm/fmd/common/fmd_asru.c
index ff3c6ba367..058a3ef384 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_asru.c
@@ -164,7 +164,7 @@ fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
}
static int
-fmd_asru_is_present(nvlist_t *event)
+fmd_asru_replacement_state(nvlist_t *event)
{
int ps = -1;
nvlist_t *asru, *fru, *rsrc;
@@ -181,16 +181,36 @@ fmd_asru_is_present(nvlist_t *event)
* as still present.
*/
if (fmd_asru_fake_not_present)
- ps = 0;
- if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0)
- ps = fmd_fmri_present(asru);
- if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
- &rsrc) == 0)
- ps = fmd_fmri_present(rsrc);
- if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0)
- ps = fmd_fmri_present(fru);
+ return (fmd_asru_fake_not_present);
+ if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0)
+ ps = fmd_fmri_replaced(asru);
+ if (ps == -1) {
+ if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0)
+ ps = fmd_fmri_replaced(rsrc);
+ } else if (ps == FMD_OBJ_STATE_UNKNOWN) {
+ /* see if we can improve on UNKNOWN */
+ if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
+ &rsrc) == 0) {
+ int ps2 = fmd_fmri_replaced(rsrc);
+ if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
+ ps2 == FMD_OBJ_STATE_REPLACED)
+ ps = ps2;
+ }
+ }
+ if (ps == -1) {
+ if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0)
+ ps = fmd_fmri_replaced(fru);
+ } else if (ps == FMD_OBJ_STATE_UNKNOWN) {
+ /* see if we can improve on UNKNOWN */
+ if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) {
+ int ps2 = fmd_fmri_replaced(fru);
+ if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
+ ps2 == FMD_OBJ_STATE_REPLACED)
+ ps = ps2;
+ }
+ }
if (ps == -1)
- ps = 1;
+ ps = FMD_OBJ_STATE_UNKNOWN;
return (ps);
}
@@ -404,7 +424,10 @@ static void
fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
{
nvlist_t *nvl = FMD_EVENT_NVL(ep);
- boolean_t f, u, ps, us;
+ boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE;
+ int ps;
+ boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE;
+ boolean_t acquitted = FMD_B_FALSE;
nvlist_t *flt, *flt_copy, *asru;
char *case_uuid = NULL, *case_code = NULL;
fmd_asru_t *ap;
@@ -420,7 +443,8 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
/*
* Extract the most recent values of 'faulty' from the event log.
*/
- if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, &f) != 0) {
+ if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY,
+ &faulty) != 0) {
fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
"invalid event log record\n", lp->log_name);
ahp->ah_error = EFMD_ASRU_EVENT;
@@ -434,16 +458,25 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
}
(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid);
(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code);
+ (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE,
+ &unusable);
+ (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED,
+ &repaired);
+ (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED,
+ &replaced);
+ (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED,
+ &acquitted);
/*
- * Attempt to recreate the case in the CLOSED state.
+ * Attempt to recreate the case in either the CLOSED or REPAIRED state
+ * (depending on whether the faulty bit is still set).
* If the case is already present, fmd_case_recreate() will return it.
* If not, we'll create a new orphaned case. Either way, we use the
* ASRU event to insert a suspect into the partially-restored case.
*/
fmd_module_lock(fmd.d_rmod);
- cp = fmd_case_recreate(fmd.d_rmod, NULL, FMD_CASE_CLOSED, case_uuid,
- case_code);
+ cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED :
+ FMD_CASE_REPAIRED, case_uuid, case_code);
fmd_case_hold(cp);
fmd_module_unlock(fmd.d_rmod);
if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
@@ -478,37 +511,31 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
ap = alp->al_asru;
/*
- * Check to see if the resource is still present in the system. If
- * so, then update the value of the unusable bit based on the current
- * system configuration. If not, then consider unusable.
+ * Check to see if the resource is still present in the system.
*/
- ps = fmd_asru_is_present(flt);
- if (ps) {
- if (nvlist_lookup_nvlist(flt, FM_FAULT_ASRU, &asru) != 0)
- u = FMD_B_FALSE;
- else if ((us = fmd_fmri_unusable(asru)) == -1) {
- fmd_error(EFMD_ASRU_FMRI, "failed to update "
- "status of asru %s", lp->log_name);
- u = FMD_B_FALSE;
- } else
- u = us != 0;
-
- } else
- u = FMD_B_TRUE; /* not present; set unusable */
+ ps = fmd_asru_replacement_state(flt);
+ if (ps == FMD_OBJ_STATE_STILL_PRESENT || ps == FMD_OBJ_STATE_UNKNOWN)
+ ap->asru_flags |= FMD_ASRU_PRESENT;
+ else if (ps == FMD_OBJ_STATE_REPLACED)
+ replaced = FMD_B_TRUE;
nvlist_free(flt);
ap->asru_flags |= FMD_ASRU_RECREATED;
- if (ps)
- ap->asru_flags |= FMD_ASRU_PRESENT;
- if (f) {
+ if (faulty) {
alp->al_flags |= FMD_ASRU_FAULTY;
ap->asru_flags |= FMD_ASRU_FAULTY;
}
- if (u) {
+ if (unusable) {
alp->al_flags |= FMD_ASRU_UNUSABLE;
ap->asru_flags |= FMD_ASRU_UNUSABLE;
}
+ if (replaced)
+ alp->al_reason = FMD_ASRU_REPLACED;
+ else if (repaired)
+ alp->al_reason = FMD_ASRU_REPAIRED;
+ else if (acquitted)
+ alp->al_reason = FMD_ASRU_ACQUITTED;
TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid,
(void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
@@ -629,29 +656,34 @@ fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
* Check if the resource is still present. If not, and if the rsrc.age time
* has expired, then do an implicit repair on the resource.
*/
+/*ARGSUSED*/
static void
-fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *er)
+fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg)
{
struct timeval tv;
fmd_log_t *lp;
hrtime_t hrt;
+ int ps;
+ int err;
- if (fmd_asru_is_present(alp->al_event))
- return;
- fmd_time_gettimeofday(&tv);
- lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU);
- hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
- fmd_log_rele(lp);
- if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime)
- fmd_asru_repair(alp, er);
+ ps = fmd_asru_replacement_state(alp->al_event);
+ if (ps == FMD_OBJ_STATE_REPLACED) {
+ fmd_asru_replaced(alp, &err);
+ } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) {
+ fmd_time_gettimeofday(&tv);
+ lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid,
+ FMD_LOG_ASRU);
+ hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
+ fmd_log_rele(lp);
+ if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime)
+ fmd_asru_removed(alp);
+ }
}
void
fmd_asru_clear_aged_rsrcs()
{
- int err;
-
- fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, &err);
+ fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL);
}
fmd_asru_hash_t *
@@ -881,25 +913,6 @@ fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name)
}
/*
- * Lookup an asru in the hash and place a hold on it.
- */
-fmd_asru_t *
-fmd_asru_hash_lookup_nvl(fmd_asru_hash_t *ahp, nvlist_t *fmri)
-{
- fmd_asru_t *ap;
- char *name = NULL;
- ssize_t namelen;
-
- if (fmd_asru_get_namestr(fmri, &name, &namelen) != 0)
- return (NULL);
- (void) pthread_rwlock_rdlock(&ahp->ah_lock);
- ap = fmd_asru_hash_lookup(ahp, name);
- (void) pthread_rwlock_unlock(&ahp->ah_lock);
- fmd_free(name, namelen + 1);
- return (ap);
-}
-
-/*
* Create a resource cache entry using the fault event "nvl" for one of the
* suspects from the case "cp".
*
@@ -1109,12 +1122,13 @@ static void
fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er)
{
if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
- alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY))
+ alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+ FMD_ASRU_REPAIRED))
fmd_case_update(alp->al_case);
}
void
-fmd_asru_repair(fmd_asru_link_t *alp, void *er)
+fmd_asru_repaired(fmd_asru_link_t *alp, void *er)
{
int flags;
int rval;
@@ -1122,7 +1136,7 @@ fmd_asru_repair(fmd_asru_link_t *alp, void *er)
/*
* repair this asru cache entry
*/
- rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY);
+ rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED);
/*
* now check if all entries associated with this asru are repaired and
@@ -1149,12 +1163,134 @@ fmd_asru_repair(fmd_asru_link_t *alp, void *er)
}
static void
+fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er)
+{
+ if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
+ alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+ FMD_ASRU_ACQUITTED))
+ fmd_case_update(alp->al_case);
+}
+
+void
+fmd_asru_acquit(fmd_asru_link_t *alp, void *er)
+{
+ int flags;
+ int rval;
+
+ /*
+ * acquit this asru cache entry
+ */
+ rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED);
+
+ /*
+ * now check if all entries associated with this asru are acquitted and
+ * if so acquit containees
+ */
+ (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
+ flags = alp->al_asru->asru_flags;
+ (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
+ if (!(flags & FMD_ASRU_FAULTY))
+ fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee,
+ alp->al_asru_fmri);
+
+ /*
+ * if called from fmd_adm_acquit() and we really did clear the bit then
+ * we need to do a case update to see if the associated case can be
+ * repaired. No need to do this if called from fmd_case_acquit() (ie
+ * when er is NULL) as the case will be explicitly repaired anyway.
+ */
+ if (er) {
+ *(int *)er = 0;
+ if (rval)
+ fmd_case_update(alp->al_case);
+ }
+}
+
+static void
+fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er)
+{
+ if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
+ alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+ FMD_ASRU_REPLACED))
+ fmd_case_update(alp->al_case);
+}
+
+void
+fmd_asru_replaced(fmd_asru_link_t *alp, void *er)
+{
+ int flags;
+ int rval;
+ int ps;
+
+ ps = fmd_asru_replacement_state(alp->al_event);
+ if (ps == FMD_OBJ_STATE_STILL_PRESENT)
+ return;
+
+ /*
+ * mark this cache entry as replaced
+ */
+ rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED);
+
+ /*
+ * now check if all entries associated with this asru are replaced and
+ * if so replace containees
+ */
+ (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
+ flags = alp->al_asru->asru_flags;
+ (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
+ if (!(flags & FMD_ASRU_FAULTY))
+ fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee,
+ alp->al_asru_fmri);
+
+ *(int *)er = 0;
+ if (rval)
+ fmd_case_update(alp->al_case);
+}
+
+static void
+fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er)
+{
+ if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
+ alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+ 0))
+ fmd_case_update(alp->al_case);
+}
+
+void
+fmd_asru_removed(fmd_asru_link_t *alp)
+{
+ int flags;
+ int rval;
+
+ /*
+ * mark this cache entry as replacded
+ */
+ rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0);
+
+ /*
+ * now check if all entries associated with this asru are removed and
+ * if so replace containees
+ */
+ (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
+ flags = alp->al_asru->asru_flags;
+ (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
+ if (!(flags & FMD_ASRU_FAULTY))
+ fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee,
+ alp->al_asru_fmri);
+ if (rval)
+ fmd_case_update(alp->al_case);
+}
+
+static void
fmd_asru_logevent(fmd_asru_link_t *alp)
{
fmd_asru_t *ap = alp->al_asru;
- boolean_t f = (ap->asru_flags & FMD_ASRU_FAULTY) != 0;
- boolean_t u = (ap->asru_flags & FMD_ASRU_UNUSABLE) != 0;
- boolean_t m = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
+ boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0;
+ boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0;
+ boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
+ boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED);
+ boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED);
+ boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED);
fmd_case_impl_t *cip;
fmd_event_t *e;
@@ -1172,9 +1308,9 @@ fmd_asru_logevent(fmd_asru_link_t *alp)
if (lp == NULL)
return; /* can't log events if we can't open the log */
- nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[f | (u << 1)],
- alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, f, u, m,
- alp->al_event, &cip->ci_tv);
+ nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)],
+ alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable,
+ message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted);
(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
@@ -1224,7 +1360,7 @@ fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag)
}
int
-fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag)
+fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason)
{
fmd_asru_t *ap = alp->al_asru;
fmd_asru_link_t *nalp;
@@ -1240,9 +1376,16 @@ fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag)
nstate = alp->al_flags & FMD_ASRU_STATE;
if (nstate == ostate) {
+ if (reason > alp->al_reason) {
+ alp->al_reason = reason;
+ fmd_asru_logevent(alp);
+ (void) pthread_cond_broadcast(&ap->asru_cv);
+ }
(void) pthread_mutex_unlock(&ap->asru_lock);
return (0);
}
+ if (reason > alp->al_reason)
+ alp->al_reason = reason;
if (sflag == FMD_ASRU_UNUSABLE)
ap->asru_flags &= ~sflag;
@@ -1277,15 +1420,36 @@ fmd_asru_al_getstate(fmd_asru_link_t *alp)
{
int us, st;
nvlist_t *asru;
+ int ps;
- if (fmd_asru_is_present(alp->al_event) == 0)
+ ps = fmd_asru_replacement_state(alp->al_event);
+ if (ps == FMD_OBJ_STATE_NOT_PRESENT)
return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
+ if (ps == FMD_OBJ_STATE_REPLACED) {
+ if (alp->al_reason < FMD_ASRU_REPLACED)
+ alp->al_reason = FMD_ASRU_REPLACED;
+ return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
+ }
- if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0)
- us = fmd_fmri_unusable(asru);
- else
- us = (alp->al_flags & FMD_ASRU_UNUSABLE);
st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT;
+ if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) {
+ us = fmd_fmri_service_state(asru);
+ if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) {
+ /* not supported by scheme - try fmd_fmri_unusable */
+ us = fmd_fmri_unusable(asru);
+ } else if (us == FMD_SERVICE_STATE_UNUSABLE) {
+ st |= FMD_ASRU_UNUSABLE;
+ return (st);
+ } else if (us == FMD_SERVICE_STATE_OK) {
+ st &= ~FMD_ASRU_UNUSABLE;
+ return (st);
+ } else if (us == FMD_SERVICE_STATE_DEGRADED) {
+ st &= ~FMD_ASRU_UNUSABLE;
+ st |= FMD_ASRU_DEGRADED;
+ return (st);
+ }
+ } else
+ us = (alp->al_flags & FMD_ASRU_UNUSABLE);
if (us > 0)
st |= FMD_ASRU_UNUSABLE;
else if (us == 0)
@@ -1307,7 +1471,8 @@ fmd_asru_getstate(fmd_asru_t *ap)
int us, st;
if (!(ap->asru_flags & FMD_ASRU_INTERNAL) &&
- (fmd_asru_fake_not_present || fmd_fmri_present(ap->asru_fmri) <= 0))
+ (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED ||
+ fmd_fmri_present(ap->asru_fmri) <= 0))
return (0); /* do not report non-fmd non-present resources */
us = fmd_fmri_unusable(ap->asru_fmri);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_asru.h b/usr/src/cmd/fm/fmd/common/fmd_asru.h
index 9304b87cf3..f0a5738f3e 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_asru.h
@@ -89,6 +89,7 @@ typedef struct fmd_asru_link {
nvlist_t *al_event; /* event associated with last change */
uint_t al_refs; /* reference count */
uint_t al_flags; /* flags (see below) */
+ uint8_t al_reason; /* repair reason (see below) */
} fmd_asru_link_t;
#define FMD_ASRU_FAULTY 0x01 /* asru has been diagnosed as faulty */
@@ -98,6 +99,15 @@ typedef struct fmd_asru_link {
#define FMD_ASRU_INVISIBLE 0x10 /* asru is not visibly administered */
#define FMD_ASRU_RECREATED 0x20 /* asru recreated by cache replay */
#define FMD_ASRU_PRESENT 0x40 /* asru present at last R$ update */
+#define FMD_ASRU_DEGRADED 0x80 /* asru service is degraded */
+
+/*
+ * Note the following are defined in order of increasing precedence and
+ * this should not be changed
+ */
+#define FMD_ASRU_ACQUITTED 1 /* asru acquitted */
+#define FMD_ASRU_REPAIRED 2 /* asru repaired */
+#define FMD_ASRU_REPLACED 3 /* asru replaced */
#define FMD_ASRU_STATE (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE)
@@ -146,16 +156,18 @@ extern void fmd_asru_hash_apply_by_case(fmd_asru_hash_t *, fmd_case_t *,
void (*)(fmd_asru_link_t *, void *), void *);
extern fmd_asru_t *fmd_asru_hash_lookup_name(fmd_asru_hash_t *, const char *);
-extern fmd_asru_t *fmd_asru_hash_lookup_nvl(fmd_asru_hash_t *, nvlist_t *);
extern fmd_asru_link_t *fmd_asru_hash_create_entry(fmd_asru_hash_t *,
fmd_case_t *, nvlist_t *);
extern void fmd_asru_hash_release(fmd_asru_hash_t *, fmd_asru_t *);
extern void fmd_asru_hash_delete_case(fmd_asru_hash_t *, fmd_case_t *);
extern void fmd_asru_clear_aged_rsrcs();
-extern void fmd_asru_repair(fmd_asru_link_t *, void *);
+extern void fmd_asru_repaired(fmd_asru_link_t *, void *);
+extern void fmd_asru_acquit(fmd_asru_link_t *, void *);
+extern void fmd_asru_replaced(fmd_asru_link_t *, void *);
+extern void fmd_asru_removed(fmd_asru_link_t *);
extern int fmd_asru_setflags(fmd_asru_link_t *, uint_t);
-extern int fmd_asru_clrflags(fmd_asru_link_t *, uint_t);
+extern int fmd_asru_clrflags(fmd_asru_link_t *, uint_t, uint8_t);
extern int fmd_asru_al_getstate(fmd_asru_link_t *);
extern int fmd_asru_getstate(fmd_asru_t *);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_case.c b/usr/src/cmd/fm/fmd/common/fmd_case.c
index 702f57799c..2958e5ad68 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_case.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_case.c
@@ -50,16 +50,37 @@
* +------------+
* +----------| UNSOLVED |
* | +------------+
- * 1 | 4 |
- * | |
- * +----v---+ /-2->+------v-----+ 3 +--------+
- * | SOLVED |< | CLOSE_WAIT |--------->| CLOSED |
- * +--------+ \-5->+------------+ +--------+
- * | |
- * 6 | | 7
- * +------v-----+ |
- * | REPAIRED |<-------------+
+ * | 1 |
+ * | |
+ * | +-------v----+
+ * 2 | | SOLVED |
+ * | +------------+
+ * | 3 | 5 |
+ * +------------+ | |
+ * | | |
+ * +-v---v----v-+
+ * | CLOSE_WAIT |
+ * +------------+
+ * | | |
+ * +-----------+ | +------------+
+ * | 4 | |
+ * v +-----v------+ |
+ * discard | CLOSED | 6 |
+ * +------------+ |
+ * | |
+ * | +------------+
+ * 7 | |
+ * +-----v----v-+
+ * | REPAIRED |
+ * +------------+
+ * |
+ * 8 |
+ * +-----v------+
+ * | RESOLVED |
* +------------+
+ * |
+ * v
+ * discard
*
* The state machine changes are triggered by calls to fmd_case_transition()
* from various locations inside of fmd, as described below:
@@ -70,34 +91,37 @@
* suspects convicted are marked faulty (F) in R$
* list.suspect event logged and dispatched
*
- * [2] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
+ * [2] Called by: fmd_case_close(), fmd_case_uuclose()
+ * Actions: diagnosis engine fmdo_close() entry point scheduled
+ * case discarded upon exit from CLOSE_WAIT
+ *
+ * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
* Actions: FMD_CF_ISOLATED flag is set in ci_flags
* suspects convicted (F) are marked unusable (U) in R$
* diagnosis engine fmdo_close() entry point scheduled
- * case transitions to CLOSED [3] upon exit from CLOSE_WAIT
+ * case transitions to CLOSED [4] upon exit from CLOSE_WAIT
*
- * [3] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
+ * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
* Actions: list.isolated event dispatched
* case deleted from module's list of open cases
*
- * [4] Called by: fmd_case_close(), fmd_case_uuclose()
- * Actions: diagnosis engine fmdo_close() entry point scheduled
- * case is subsequently discarded by fmd_case_delete()
- *
* [5] Called by: fmd_case_repair(), fmd_case_update()
* Actions: FMD_CF_REPAIR flag is set in ci_flags
* diagnosis engine fmdo_close() entry point scheduled
* case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
*
- * [6] Called by: fmd_case_repair(), fmd_case_update()
- * Actions: FMD_CF_REPAIR flag is set in ci_flags
- * suspects convicted are marked non faulty (!F) in R$
- * list.repaired event dispatched
+ * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
+ * Actions: suspects convicted are marked non faulty (!F) in R$
+ * list.repaired or list.updated event dispatched
*
* [7] Called by: fmd_case_repair(), fmd_case_update()
* Actions: FMD_CF_REPAIR flag is set in ci_flags
* suspects convicted are marked non faulty (!F) in R$
- * list.repaired event dispatched
+ * list.repaired or list.updated event dispatched
+ *
+ * [8] Called by: fmd_case_uuresolve()
+ * Actions: list.resolved event dispatched
+ * case is discarded
*/
#include <sys/fm/protocol.h>
@@ -128,11 +152,10 @@ static const char *const _fmd_case_snames[] = {
"SOLVED", /* FMD_CASE_SOLVED */
"CLOSE_WAIT", /* FMD_CASE_CLOSE_WAIT */
"CLOSED", /* FMD_CASE_CLOSED */
- "REPAIRED" /* FMD_CASE_REPAIRED */
+ "REPAIRED", /* FMD_CASE_REPAIRED */
+ "RESOLVED" /* FMD_CASE_RESOLVED */
};
-extern volatile uint32_t fmd_asru_fake_not_present;
-
static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *);
fmd_case_hash_t *
@@ -300,12 +323,20 @@ fmd_case_set_lst(fmd_asru_link_t *alp, void *arg)
*entryp->fcl_msgp = B_FALSE;
entryp->fcl_ba[*entryp->fcl_countp] = 0;
state = fmd_asru_al_getstate(alp);
+ if (state & FMD_ASRU_DEGRADED)
+ entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED;
if (state & FMD_ASRU_UNUSABLE)
entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE;
if (state & FMD_ASRU_FAULTY)
entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY;
if (!(state & FMD_ASRU_PRESENT))
entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT;
+ if (alp->al_reason == FMD_ASRU_REPAIRED)
+ entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED;
+ else if (alp->al_reason == FMD_ASRU_REPLACED)
+ entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED;
+ else if (alp->al_reason == FMD_ASRU_ACQUITTED)
+ entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED;
entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event;
(*entryp->fcl_countp)++;
}
@@ -326,6 +357,29 @@ fmd_case_usable(fmd_asru_link_t *alp, void *arg)
*usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE);
}
+static void
+fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg)
+{
+ int *not_faultyp = (int *)arg;
+
+ *not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY);
+}
+
+/*
+ * Have we got any suspects with an asru that are still unusable and present?
+ */
+static void
+fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg)
+{
+ int *rvalp = (int *)arg;
+ int state = fmd_asru_al_getstate(alp);
+ nvlist_t *asru;
+
+ if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0)
+ return;
+ *rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT));
+}
+
nvlist_t *
fmd_case_mkevent(fmd_case_t *cp, const char *class)
{
@@ -359,11 +413,15 @@ fmd_case_mkevent(fmd_case_t *cp, const char *class)
if (cip->ci_code == NULL)
(void) fmd_case_mkcode(cp);
/*
- * For repair event, we lookup diagcode from dict using key
- * "list.repaired".
+ * For repair and updated event, we lookup diagcode from dict using key
+ * "list.repaired" or "list.updated" or "list.resolved".
*/
if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
(void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code);
+ else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
+ (void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code);
+ else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
+ (void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code);
else
code = cip->ci_code;
@@ -555,7 +613,7 @@ fmd_case_convict(fmd_case_t *cp)
"%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
continue;
}
- (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE);
+ (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
}
@@ -630,6 +688,16 @@ fmd_case_publish(fmd_case_t *cp, uint_t state)
(void) pthread_rwlock_unlock(&fmd.d_log_lock);
fmd_dispq_dispatch(fmd.d_disp, e, class);
break;
+
+ case FMD_CASE_RESOLVED:
+ nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS);
+ (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
+ e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
+ (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
+ fmd_log_append(fmd.d_fltlog, e, cp);
+ (void) pthread_rwlock_unlock(&fmd.d_log_lock);
+ fmd_dispq_dispatch(fmd.d_disp, e, class);
+ break;
}
}
@@ -805,7 +873,7 @@ fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
fmd_case_impl_t *eip;
- ASSERT(state < FMD_CASE_REPAIRED);
+ ASSERT(state < FMD_CASE_RESOLVED);
(void) pthread_mutex_init(&cip->ci_lock, NULL);
fmd_buf_hash_create(&cip->ci_bufs);
@@ -841,6 +909,14 @@ fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
* return the existing case that we found without changing it.
*/
if (mp == fmd.d_rmod) {
+ /*
+ * When recreating an orphan case, state passed in may
+ * either be CLOSED (faulty) or REPAIRED (!faulty). If
+ * any suspects are still CLOSED (faulty) then the
+ * overall state needs to be CLOSED.
+ */
+ if (state == FMD_CASE_CLOSED)
+ cip->ci_state = FMD_CASE_CLOSED;
(void) pthread_mutex_unlock(&cip->ci_lock);
fmd_case_rele((fmd_case_t *)cip);
return ((fmd_case_t *)cip);
@@ -1107,7 +1183,8 @@ fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
boolean_t b;
(void) pthread_mutex_lock(&cip->ci_lock);
- ASSERT(cip->ci_state == FMD_CASE_CLOSED);
+ ASSERT(cip->ci_state == FMD_CASE_CLOSED ||
+ cip->ci_state == FMD_CASE_REPAIRED);
ASSERT(cip->ci_mod == fmd.d_rmod);
cis->cis_next = cip->ci_suspects;
@@ -1156,8 +1233,10 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
fmd_case_item_t *cit;
fmd_event_t *e;
+ int resolved = 0;
+ int any_unusable_and_present = 0;
- ASSERT(state <= FMD_CASE_REPAIRED);
+ ASSERT(state <= FMD_CASE_RESOLVED);
(void) pthread_mutex_lock(&cip->ci_lock);
if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED))
@@ -1211,9 +1290,49 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
case FMD_CASE_REPAIRED:
ASSERT(fmd_case_orphaned(cp));
+
+ /*
+ * If all suspects are already either usable or not present then
+ * transition straight to RESOLVED state, publishing both the
+ * list.repaired and list.resolved.
+ */
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
+ fmd_case_unusable_and_present, &any_unusable_and_present);
+ if (any_unusable_and_present)
+ break;
+
+ fmd_module_lock(cip->ci_mod);
+ fmd_list_delete(&cip->ci_mod->mod_cases, cip);
+ fmd_module_unlock(cip->ci_mod);
+ cip->ci_state = FMD_CASE_RESOLVED;
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+ fmd_case_publish(cp, state);
+ TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
+ _fmd_case_snames[FMD_CASE_REPAIRED],
+ _fmd_case_snames[FMD_CASE_RESOLVED]));
+ state = FMD_CASE_RESOLVED;
+ resolved = 1;
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ break;
+
+ case FMD_CASE_RESOLVED:
+ ASSERT(fmd_case_orphaned(cp));
+
+ /*
+ * If all suspects are already either usable or not present then
+ * carry on, publish list.resolved and discard the case.
+ */
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
+ fmd_case_unusable_and_present, &any_unusable_and_present);
+ if (any_unusable_and_present) {
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+ return;
+ }
+
fmd_module_lock(cip->ci_mod);
fmd_list_delete(&cip->ci_mod->mod_cases, cip);
fmd_module_unlock(cip->ci_mod);
+ resolved = 1;
break;
}
@@ -1236,12 +1355,13 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
}
- /*
- * If we transitioned to REPAIRED, adjust the reference count to
- * reflect our removal from fmd.d_rmod->mod_cases. If the caller has
- * not placed an additional hold on the case, it will now be freed.
- */
- if (state == FMD_CASE_REPAIRED) {
+ if (resolved) {
+ /*
+ * If we transitioned to RESOLVED, adjust the reference count to
+ * reflect our removal from fmd.d_rmod->mod_cases above. If the
+ * caller has not placed an additional hold on the case, it
+ * will now be freed.
+ */
(void) pthread_mutex_lock(&cip->ci_lock);
fmd_asru_hash_delete_case(fmd.d_asrus, cp);
(void) pthread_mutex_unlock(&cip->ci_lock);
@@ -1254,34 +1374,23 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
* re-validating the suspect list using the resource cache. This function is
* employed by the checkpoint code when restoring a saved, solved case to see
* if the state of the case has effectively changed while fmd was not running
- * or the module was not loaded. If none of the suspects are present anymore,
- * advance the state to REPAIRED. If none are usable, advance to CLOSE_WAIT.
+ * or the module was not loaded.
*/
void
fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags)
{
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
- int faulty = 0; /* are any suspects faulty? */
int usable = 0; /* are any suspects usable? */
ASSERT(state >= FMD_CASE_SOLVED);
(void) pthread_mutex_lock(&cip->ci_lock);
- fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable);
(void) pthread_mutex_unlock(&cip->ci_lock);
- /*
- * If none of the suspects were faulty, it implies they were either
- * repaired already or not present and the rsrc.age time has expired.
- * We can move the state on to repaired.
- */
- if (!faulty) {
- state = MAX(state, FMD_CASE_CLOSE_WAIT);
- flags |= FMD_CF_REPAIRED;
- } else if (!usable) {
+ if (!usable) {
state = MAX(state, FMD_CASE_CLOSE_WAIT);
flags |= FMD_CF_ISOLATED;
}
@@ -1361,8 +1470,20 @@ fmd_case_update(fmd_case_t *cp)
fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
(void) pthread_mutex_unlock(&cip->ci_lock);
- if (faulty)
+ if (faulty) {
+ nvlist_t *nvl;
+ fmd_event_t *e;
+ char *class;
+
+ nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
+ (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
+ e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
+ (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
+ fmd_log_append(fmd.d_fltlog, e, cp);
+ (void) pthread_rwlock_unlock(&fmd.d_log_lock);
+ fmd_dispq_dispatch(fmd.d_disp, e, class);
return; /* one or more suspects are still marked faulty */
+ }
if (cstate == FMD_CASE_CLOSED)
fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
@@ -1480,7 +1601,42 @@ fmd_case_repair(fmd_case_t *cp)
return (0); /* already repaired */
}
- fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repair, NULL);
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, NULL);
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+
+ if (cstate == FMD_CASE_CLOSED)
+ fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
+ else
+ fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
+
+ return (0);
+}
+
+int
+fmd_case_acquit(fmd_case_t *cp)
+{
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+ uint_t cstate;
+
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ cstate = cip->ci_state;
+
+ if (cip->ci_xprt != NULL) {
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+ return (fmd_set_errno(EFMD_CASE_OWNER));
+ }
+
+ if (cstate < FMD_CASE_SOLVED) {
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+ return (fmd_set_errno(EFMD_CASE_STATE));
+ }
+
+ if (cip->ci_flags & FMD_CF_REPAIRED) {
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+ return (0); /* already repaired */
+ }
+
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_acquit, NULL);
(void) pthread_mutex_unlock(&cip->ci_lock);
if (cstate == FMD_CASE_CLOSED)
@@ -1535,3 +1691,72 @@ fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec)
((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec;
((fmd_case_impl_t *)cp)->ci_tv_valid = 1;
}
+
+/*ARGSUSED*/
+void
+fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
+{
+ int not_faulty = 0;
+ int faulty = 0;
+ nvlist_t *nvl;
+ fmd_event_t *e;
+ char *class;
+ int any_unusable_and_present = 0;
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+
+ if (cip->ci_state < FMD_CASE_SOLVED)
+ return;
+
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty,
+ &not_faulty);
+
+ if (!faulty) {
+ /*
+ * If none of the suspects is faulty, replay the list.repaired.
+ * If all suspects are already either usable or not present then
+ * also transition straight to RESOLVED state.
+ */
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
+ fmd_case_unusable_and_present, &any_unusable_and_present);
+ if (!any_unusable_and_present) {
+ fmd_module_lock(cip->ci_mod);
+ fmd_list_delete(&cip->ci_mod->mod_cases, cip);
+ fmd_module_unlock(cip->ci_mod);
+ cip->ci_state = FMD_CASE_RESOLVED;
+
+ nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
+ (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
+ e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
+ class);
+ fmd_dispq_dispatch(fmd.d_disp, e, class);
+
+ fmd_case_publish(cp, FMD_CASE_RESOLVED);
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ fmd_asru_hash_delete_case(fmd.d_asrus, cp);
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+ fmd_case_rele(cp);
+ } else {
+ nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
+ (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
+ e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
+ class);
+ fmd_dispq_dispatch(fmd.d_disp, e, class);
+ }
+ } else if (not_faulty) {
+ /*
+ * if some but not all of the suspects are not faulty, replay
+ * the list.updated.
+ */
+ nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
+ (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
+ e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
+ fmd_dispq_dispatch(fmd.d_disp, e, class);
+ }
+}
+
+void
+fmd_case_repair_replay()
+{
+ fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL);
+}
diff --git a/usr/src/cmd/fm/fmd/common/fmd_case.h b/usr/src/cmd/fm/fmd/common/fmd_case.h
index 5995d825ed..a635173795 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_case.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_case.h
@@ -84,7 +84,8 @@ typedef struct fmd_case_impl {
#define FMD_CASE_SOLVED 1 /* case is solved (suspects added) */
#define FMD_CASE_CLOSE_WAIT 2 /* case is executing fmdo_close() */
#define FMD_CASE_CLOSED 3 /* case is closed (reconfig done) */
-#define FMD_CASE_REPAIRED 4 /* case is repaired (can be freed) */
+#define FMD_CASE_REPAIRED 4 /* case is repaired */
+#define FMD_CASE_RESOLVED 5 /* case is resolved (can be freed) */
#define FMD_CF_DIRTY 0x01 /* case is in need of checkpoint */
#define FMD_CF_SOLVED 0x02 /* case has been solved */
@@ -138,8 +139,10 @@ extern void fmd_case_discard(fmd_case_t *);
extern void fmd_case_settime(fmd_case_t *, time_t, suseconds_t);
extern int fmd_case_repair(fmd_case_t *);
+extern int fmd_case_acquit(fmd_case_t *);
extern int fmd_case_contains(fmd_case_t *, fmd_event_t *);
extern int fmd_case_orphaned(fmd_case_t *);
+extern void fmd_case_repair_replay(void);
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/fm/fmd/common/fmd_dispq.c b/usr/src/cmd/fm/fmd/common/fmd_dispq.c
index e0e32270e1..8519a4475a 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_dispq.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_dispq.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -21,7 +20,7 @@
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -328,9 +327,12 @@ fmd_dispq_dispatch_gid(fmd_dispq_t *dqp,
* events contained inside of it, determine the maximum length of all
* class strings that will be used in this dispatch operation.
*/
- if (FMD_EVENT_TYPE(ep) == FMD_EVT_PROTOCOL && strcmp(class,
- FM_LIST_SUSPECT_CLASS) == 0 && nvlist_lookup_nvlist_array(
- FMD_EVENT_NVL(ep), FM_SUSPECT_FAULT_LIST, &nva, &nvc) == 0) {
+ if (FMD_EVENT_TYPE(ep) == FMD_EVT_PROTOCOL &&
+ (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
+ strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
+ strcmp(class, FM_LIST_UPDATED_CLASS) == 0) &&
+ nvlist_lookup_nvlist_array(FMD_EVENT_NVL(ep), FM_SUSPECT_FAULT_LIST,
+ &nva, &nvc) == 0) {
for (nvi = 0; nvi < nvc; nvi++) {
if (nvlist_lookup_string(nva[nvi], FM_CLASS, &c) == 0) {
size_t len = strlen(c) + 1;
diff --git a/usr/src/cmd/fm/fmd/common/fmd_fmri.c b/usr/src/cmd/fm/fmd/common/fmd_fmri.c
index e3630e1025..04a01ac2df 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_fmri.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_fmri.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -324,6 +324,40 @@ fmd_fmri_present(nvlist_t *nvl)
}
int
+fmd_fmri_replaced(nvlist_t *nvl)
+{
+ fmd_scheme_t *sp;
+ int rv;
+
+ if ((sp = nvl2scheme(nvl)) == NULL)
+ return (-1); /* errno is set for us */
+
+ (void) pthread_mutex_lock(&sp->sch_opslock);
+ rv = sp->sch_ops.sop_replaced(nvl);
+ (void) pthread_mutex_unlock(&sp->sch_opslock);
+
+ fmd_scheme_hash_release(fmd.d_schemes, sp);
+ return (rv);
+}
+
+int
+fmd_fmri_service_state(nvlist_t *nvl)
+{
+ fmd_scheme_t *sp;
+ int rv;
+
+ if ((sp = nvl2scheme(nvl)) == NULL)
+ return (-1); /* errno is set for us */
+
+ (void) pthread_mutex_lock(&sp->sch_opslock);
+ rv = sp->sch_ops.sop_service_state(nvl);
+ (void) pthread_mutex_unlock(&sp->sch_opslock);
+
+ fmd_scheme_hash_release(fmd.d_schemes, sp);
+ return (rv);
+}
+
+int
fmd_fmri_unusable(nvlist_t *nvl)
{
fmd_scheme_t *sp;
diff --git a/usr/src/cmd/fm/fmd/common/fmd_fmri.h b/usr/src/cmd/fm/fmd/common/fmd_fmri.h
index b5bbc0fcc6..6bba321a7b 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_fmri.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_fmri.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -91,10 +91,25 @@ extern void fmd_fmri_fini(void);
extern ssize_t fmd_fmri_nvl2str(nvlist_t *, char *, size_t);
extern int fmd_fmri_expand(nvlist_t *);
extern int fmd_fmri_present(nvlist_t *);
+extern int fmd_fmri_replaced(nvlist_t *);
+extern int fmd_fmri_service_state(nvlist_t *);
extern int fmd_fmri_unusable(nvlist_t *);
extern int fmd_fmri_contains(nvlist_t *, nvlist_t *);
extern nvlist_t *fmd_fmri_translate(nvlist_t *, nvlist_t *);
+#define FMD_OBJ_STATE_UNKNOWN 1
+#define FMD_OBJ_STATE_STILL_PRESENT 2
+#define FMD_OBJ_STATE_REPLACED 3
+#define FMD_OBJ_STATE_NOT_PRESENT 4
+
+#define FMD_SERVICE_STATE_UNKNOWN 0
+#define FMD_SERVICE_STATE_OK 1
+#define FMD_SERVICE_STATE_DEGRADED 2
+#define FMD_SERVICE_STATE_UNUSABLE 3
+#define FMD_SERVICE_STATE_DEGRADED_PENDING_RESET 4
+#define FMD_SERVICE_STATE_UNUSABLE_PENDING_RESET 5
+#define FMD_SERVICE_STATE_UNUSABLE_UNTIL_REPLACED 6
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/fm/fmd/common/fmd_protocol.c b/usr/src/cmd/fm/fmd/common/fmd_protocol.c
index 5feaf64f10..7064af0164 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_protocol.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_protocol.c
@@ -182,7 +182,8 @@ nvlist_t *
fmd_protocol_rsrc_asru(const char *class,
nvlist_t *fmri, const char *uuid, const char *code,
boolean_t faulty, boolean_t unusable, boolean_t message, nvlist_t *event,
- struct timeval *tvp)
+ struct timeval *tvp, boolean_t repaired, boolean_t replaced,
+ boolean_t acquitted)
{
nvlist_t *nvl;
int64_t tod[2];
@@ -206,6 +207,9 @@ fmd_protocol_rsrc_asru(const char *class,
err |= nvlist_add_string(nvl, FM_RSRC_ASRU_CODE, code);
err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, faulty);
+ err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, repaired);
+ err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, replaced);
+ err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, acquitted);
err |= nvlist_add_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, unusable);
err |= nvlist_add_boolean_value(nvl, FM_SUSPECT_MESSAGE, message);
err |= nvlist_add_int64_array(nvl, FM_SUSPECT_DIAG_TIME, tod, 2);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_protocol.h b/usr/src/cmd/fm/fmd/common/fmd_protocol.h
index 1cd90fc885..68f2196b18 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_protocol.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_protocol.h
@@ -76,7 +76,7 @@ extern nvlist_t *fmd_protocol_list(const char *, nvlist_t *,
struct timeval *);
extern nvlist_t *fmd_protocol_rsrc_asru(const char *, nvlist_t *,
const char *, const char *, boolean_t, boolean_t, boolean_t, nvlist_t *,
- struct timeval *);
+ struct timeval *m, boolean_t, boolean_t, boolean_t);
extern nvlist_t *fmd_protocol_fmderror(int, const char *, va_list);
extern nvlist_t *fmd_protocol_moderror(struct fmd_module *, int, const char *);
extern nvlist_t *fmd_protocol_xprt_ctl(struct fmd_module *,
diff --git a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
index 916a2be640..2987849868 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
@@ -468,11 +468,11 @@ fmd_adm_rsrcinfo_1_svc(char *fmri,
bool_t
fmd_adm_rsrcflush_1_svc(char *name, int *rvp, struct svc_req *req)
{
- return (fmd_adm_rsrcrepair_1_svc(name, rvp, req));
+ return (fmd_adm_rsrcrepaired_1_svc(name, rvp, req));
}
bool_t
-fmd_adm_rsrcrepair_1_svc(char *name, int *rvp, struct svc_req *req)
+fmd_adm_rsrcrepaired_1_svc(char *name, int *rvp, struct svc_req *req)
{
int err = FMD_ADM_ERR_RSRCNOTF;
@@ -480,13 +480,73 @@ fmd_adm_rsrcrepair_1_svc(char *name, int *rvp, struct svc_req *req)
err = FMD_ADM_ERR_PERM;
else {
fmd_asru_hash_apply_by_asru(fmd.d_asrus, name,
- fmd_asru_repair, &err);
+ fmd_asru_repaired, &err);
fmd_asru_hash_apply_by_label(fmd.d_asrus, name,
- fmd_asru_repair, &err);
+ fmd_asru_repaired, &err);
fmd_asru_hash_apply_by_fru(fmd.d_asrus, name,
- fmd_asru_repair, &err);
+ fmd_asru_repaired, &err);
fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name,
- fmd_asru_repair, &err);
+ fmd_asru_repaired, &err);
+ }
+ *rvp = err;
+ return (TRUE);
+}
+
+bool_t
+fmd_adm_rsrcreplaced_1_svc(char *name, int *rvp, struct svc_req *req)
+{
+ int err = FMD_ADM_ERR_RSRCNOTF;
+
+ if (fmd_rpc_deny(req))
+ err = FMD_ADM_ERR_PERM;
+ else {
+ fmd_asru_hash_apply_by_asru(fmd.d_asrus, name,
+ fmd_asru_replaced, &err);
+ fmd_asru_hash_apply_by_label(fmd.d_asrus, name,
+ fmd_asru_replaced, &err);
+ fmd_asru_hash_apply_by_fru(fmd.d_asrus, name,
+ fmd_asru_replaced, &err);
+ fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name,
+ fmd_asru_replaced, &err);
+ }
+ *rvp = err;
+ return (TRUE);
+}
+
+typedef struct {
+ int *errp;
+ char *uuid;
+} fmd_adm_ra_t;
+
+void
+fmd_asru_ra_cb(fmd_asru_link_t *alp, void *arg)
+{
+ fmd_adm_ra_t *farap = (fmd_adm_ra_t *)arg;
+
+ if (strcmp(farap->uuid, "") == 0 ||
+ strcmp(farap->uuid, alp->al_case_uuid) == 0)
+ fmd_asru_acquit(alp, farap->errp);
+}
+
+bool_t
+fmd_adm_rsrcacquit_1_svc(char *name, char *uuid, int *rvp, struct svc_req *req)
+{
+ int err = FMD_ADM_ERR_RSRCNOTF;
+ fmd_adm_ra_t fara;
+
+ if (fmd_rpc_deny(req))
+ err = FMD_ADM_ERR_PERM;
+ else {
+ fara.errp = &err;
+ fara.uuid = uuid;
+ fmd_asru_hash_apply_by_asru(fmd.d_asrus, name,
+ fmd_asru_ra_cb, &fara);
+ fmd_asru_hash_apply_by_label(fmd.d_asrus, name,
+ fmd_asru_ra_cb, &fara);
+ fmd_asru_hash_apply_by_fru(fmd.d_asrus, name,
+ fmd_asru_ra_cb, &fara);
+ fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name,
+ fmd_asru_ra_cb, &fara);
}
*rvp = err;
return (TRUE);
@@ -669,6 +729,28 @@ fmd_adm_caserepair_1_svc(char *uuid, int *rvp, struct svc_req *req)
return (TRUE);
}
+bool_t
+fmd_adm_caseacquit_1_svc(char *uuid, int *rvp, struct svc_req *req)
+{
+ fmd_case_t *cp = NULL;
+ int err = 0;
+
+ if (fmd_rpc_deny(req))
+ err = FMD_ADM_ERR_PERM;
+ else if ((cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) == NULL)
+ err = FMD_ADM_ERR_CASESRCH;
+ else if (fmd_case_acquit(cp) != 0) {
+ err = errno == EFMD_CASE_OWNER ?
+ FMD_ADM_ERR_CASEXPRT : FMD_ADM_ERR_CASEOPEN;
+ }
+
+ if (cp != NULL)
+ fmd_case_rele(cp);
+
+ *rvp = err;
+ return (TRUE);
+}
+
void
fmd_adm_caselist_case(fmd_case_t *cp, void *arg)
{
diff --git a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.x b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.x
index c2b67031cd..1f9c8a3d04 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.x
+++ b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.x
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -135,7 +135,7 @@ program FMD_ADM {
struct fmd_rpc_rsrclist FMD_ADM_RSRCLIST(bool) = 9;
struct fmd_rpc_rsrcinfo FMD_ADM_RSRCINFO(string) = 10;
int FMD_ADM_RSRCFLUSH(string) = 11;
- int FMD_ADM_RSRCREPAIR(string) = 12;
+ int FMD_ADM_RSRCREPAIRED(string) = 12;
struct fmd_rpc_serdlist FMD_ADM_SERDINFO(string) = 13;
int FMD_ADM_SERDRESET(string, string) = 14;
int FMD_ADM_LOGROTATE(string) = 15;
@@ -144,6 +144,9 @@ program FMD_ADM {
struct fmd_rpc_modstat FMD_ADM_XPRTSTAT(int32_t) = 18;
struct fmd_rpc_caselist FMD_ADM_CASELIST(void) = 19;
struct fmd_rpc_caseinfo FMD_ADM_CASEINFO(string) = 20;
+ int FMD_ADM_RSRCREPLACED(string) = 21;
+ int FMD_ADM_RSRCACQUIT(string, string) = 22;
+ int FMD_ADM_CASEACQUIT(string) = 23;
} = 1;
} = 100169;
diff --git a/usr/src/cmd/fm/fmd/common/fmd_scheme.c b/usr/src/cmd/fm/fmd/common/fmd_scheme.c
index de7580320d..7b97aa7ae4 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_scheme.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_scheme.c
@@ -78,6 +78,44 @@ fmd_scheme_fmd_present(nvlist_t *nvl)
}
static int
+fmd_scheme_fmd_replaced(nvlist_t *nvl)
+{
+ char *name, *version;
+ fmd_module_t *mp;
+ int rv = 0;
+
+ if (nvlist_lookup_string(nvl, FM_FMRI_FMD_NAME, &name) != 0 ||
+ nvlist_lookup_string(nvl, FM_FMRI_FMD_VERSION, &version) != 0)
+ return (fmd_fmri_set_errno(EINVAL));
+
+ if ((mp = fmd_modhash_lookup(fmd.d_mod_hash, name)) != NULL) {
+ rv = mp->mod_vers != NULL &&
+ strcmp(mp->mod_vers, version) == 0;
+ fmd_module_rele(mp);
+ }
+
+ return (rv ? FMD_OBJ_STATE_STILL_PRESENT : FMD_OBJ_STATE_NOT_PRESENT);
+}
+
+static int
+fmd_scheme_fmd_service_state(nvlist_t *nvl)
+{
+ char *name;
+ fmd_module_t *mp;
+ int rv = 1;
+
+ if (nvlist_lookup_string(nvl, FM_FMRI_FMD_NAME, &name) != 0)
+ return (fmd_fmri_set_errno(EINVAL));
+
+ if ((mp = fmd_modhash_lookup(fmd.d_mod_hash, name)) != NULL) {
+ rv = mp->mod_error != 0;
+ fmd_module_rele(mp);
+ }
+
+ return (rv ? FMD_SERVICE_STATE_UNUSABLE : FMD_SERVICE_STATE_OK);
+}
+
+static int
fmd_scheme_fmd_unusable(nvlist_t *nvl)
{
char *name;
@@ -125,6 +163,8 @@ static const fmd_scheme_ops_t _fmd_scheme_default_ops = {
(ssize_t (*)())fmd_scheme_notsup, /* sop_nvl2str */
(int (*)())fmd_scheme_nop, /* sop_expand */
(int (*)())fmd_scheme_notsup, /* sop_present */
+ (int (*)())fmd_scheme_notsup, /* sop_replaced */
+ (int (*)())fmd_scheme_notsup, /* sop_service_state */
(int (*)())fmd_scheme_notsup, /* sop_unusable */
(int (*)())fmd_scheme_notsup, /* sop_contains */
fmd_scheme_notranslate /* sop_translate */
@@ -136,6 +176,8 @@ static const fmd_scheme_ops_t _fmd_scheme_builtin_ops = {
fmd_scheme_fmd_nvl2str, /* sop_nvl2str */
(int (*)())fmd_scheme_nop, /* sop_expand */
fmd_scheme_fmd_present, /* sop_present */
+ fmd_scheme_fmd_replaced, /* sop_replaced */
+ fmd_scheme_fmd_service_state, /* sop_service_state */
fmd_scheme_fmd_unusable, /* sop_unusable */
(int (*)())fmd_scheme_notsup, /* sop_contains */
fmd_scheme_notranslate /* sop_translate */
@@ -151,6 +193,9 @@ static const fmd_scheme_opd_t _fmd_scheme_ops[] = {
{ "fmd_fmri_nvl2str", offsetof(fmd_scheme_ops_t, sop_nvl2str) },
{ "fmd_fmri_expand", offsetof(fmd_scheme_ops_t, sop_expand) },
{ "fmd_fmri_present", offsetof(fmd_scheme_ops_t, sop_present) },
+ { "fmd_fmri_replaced", offsetof(fmd_scheme_ops_t, sop_replaced) },
+ { "fmd_fmri_service_state", offsetof(fmd_scheme_ops_t,
+ sop_service_state) },
{ "fmd_fmri_unusable", offsetof(fmd_scheme_ops_t, sop_unusable) },
{ "fmd_fmri_contains", offsetof(fmd_scheme_ops_t, sop_contains) },
{ "fmd_fmri_translate", offsetof(fmd_scheme_ops_t, sop_translate) },
diff --git a/usr/src/cmd/fm/fmd/common/fmd_scheme.h b/usr/src/cmd/fm/fmd/common/fmd_scheme.h
index 4243d34327..356ae7d460 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_scheme.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_scheme.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -21,7 +20,7 @@
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -48,6 +47,8 @@ typedef struct fmd_scheme_ops {
ssize_t (*sop_nvl2str)(nvlist_t *, char *, size_t);
int (*sop_expand)(nvlist_t *);
int (*sop_present)(nvlist_t *);
+ int (*sop_replaced)(nvlist_t *);
+ int (*sop_service_state)(nvlist_t *);
int (*sop_unusable)(nvlist_t *);
int (*sop_contains)(nvlist_t *, nvlist_t *);
nvlist_t *(*sop_translate)(nvlist_t *, nvlist_t *);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_self.c b/usr/src/cmd/fm/fmd/common/fmd_self.c
index 81540e2065..3b0824fa4b 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_self.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_self.c
@@ -141,8 +141,10 @@ self_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
*/
if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
strcmp(class, FM_LIST_ISOLATED_CLASS) == 0 ||
+ strcmp(class, FM_LIST_UPDATED_CLASS) == 0 ||
+ strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 ||
strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
- return; /* if no agents are present just drop list.suspect */
+ return; /* if no agents are present just drop list.* */
if (strncmp(class, FMD_ERR_CLASS, FMD_ERR_CLASS_LEN) == 0)
return; /* if fmd itself produced the error just drop it */
diff --git a/usr/src/cmd/fm/fmdump/common/fault.c b/usr/src/cmd/fm/fmdump/common/fault.c
index fcb61ff761..eaa1a10f52 100644
--- a/usr/src/cmd/fm/fmdump/common/fault.c
+++ b/usr/src/cmd/fm/fmdump/common/fault.c
@@ -44,6 +44,15 @@ flt_short(fmd_log_t *lp, const fmd_log_record_t *rp, FILE *fp)
(void) snprintf(str, sizeof (str), "%s %s", code, "Repaired");
code = str;
}
+ if (class != NULL && strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) {
+ (void) snprintf(str, sizeof (str), "%s %s", code, "Resolved");
+ code = str;
+ }
+
+ if (class != NULL && strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
+ (void) snprintf(str, sizeof (str), "%s %s", code, "Updated");
+ code = str;
+ }
fmdump_printf(fp, "%-20s %-32s %s\n",
fmdump_date(buf, sizeof (buf), rp), uuid, code);
@@ -56,6 +65,7 @@ flt_verb1(fmd_log_t *lp, const fmd_log_record_t *rp, FILE *fp)
{
uint_t i, size = 0;
nvlist_t **nva;
+ uint8_t *ba;
(void) flt_short(lp, rp, fp);
(void) nvlist_lookup_uint32(rp->rec_nvl, FM_SUSPECT_FAULT_SZ, &size);
@@ -63,6 +73,8 @@ flt_verb1(fmd_log_t *lp, const fmd_log_record_t *rp, FILE *fp)
if (size != 0) {
(void) nvlist_lookup_nvlist_array(rp->rec_nvl,
FM_SUSPECT_FAULT_LIST, &nva, &size);
+ (void) nvlist_lookup_uint8_array(rp->rec_nvl,
+ FM_SUSPECT_FAULT_STATUS, &ba, &size);
}
for (i = 0; i < size; i++) {
@@ -91,15 +103,24 @@ flt_verb1(fmd_log_t *lp, const fmd_log_record_t *rp, FILE *fp)
}
- fmdump_printf(fp, " %3u%% %s\n\n",
+ fmdump_printf(fp, " %3u%% %s",
pct, class ? class : "-");
- /*
- * Originally we didn't require FM_FAULT_RESOURCE, so if it
- * isn't defined in the event, display the ASRU FMRI instead.
- */
+ if (ba[i] & FM_SUSPECT_FAULTY)
+ fmdump_printf(fp, "\n\n");
+ else if (ba[i] & FM_SUSPECT_NOT_PRESENT)
+ fmdump_printf(fp, "\tRemoved\n\n");
+ else if (ba[i] & FM_SUSPECT_REPLACED)
+ fmdump_printf(fp, "\tReplaced\n\n");
+ else if (ba[i] & FM_SUSPECT_REPAIRED)
+ fmdump_printf(fp, "\tRepair Attempted\n\n");
+ else if (ba[i] & FM_SUSPECT_ACQUITTED)
+ fmdump_printf(fp, "\tAcquitted\n\n");
+ else
+ fmdump_printf(fp, "\n\n");
+
fmdump_printf(fp, " Problem in: %s\n",
- rname ? rname : aname ? aname : "-");
+ rname ? rname : "-");
fmdump_printf(fp, " Affects: %s\n",
aname ? aname : "-");
diff --git a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
index c110c7dd97..eb9ac7c158 100644
--- a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
+++ b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
@@ -289,7 +289,7 @@ nvl2subr(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t **asrup)
}
static void
-cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, boolean_t repair)
+cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class)
{
char *uuid = NULL;
nvlist_t **nva;
@@ -308,9 +308,11 @@ cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, boolean_t repair)
}
keepopen = nvc;
- while (nvc-- != 0 && (repair || !fmd_case_uuclosed(hdl, uuid))) {
+ while (nvc-- != 0 && (strcmp(class, FM_LIST_SUSPECT_CLASS) != 0 ||
+ !fmd_case_uuclosed(hdl, uuid))) {
nvlist_t *nvl = *nva++;
const cma_subscriber_t *subr;
+ int has_fault;
if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
continue;
@@ -322,8 +324,17 @@ cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, boolean_t repair)
* A handler must not close the case itself.
*/
if (subr->subr_func != NULL) {
- err = subr->subr_func(hdl, nvl, asru, uuid, repair);
-
+ has_fault = fmd_nvl_fmri_has_fault(hdl, asru,
+ FMD_HAS_FAULT_ASRU, NULL);
+ if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
+ if (has_fault == 1)
+ err = subr->subr_func(hdl, nvl, asru,
+ uuid, 0);
+ } else {
+ if (has_fault == 0)
+ err = subr->subr_func(hdl, nvl, asru,
+ uuid, 1);
+ }
if (err == CMA_RA_SUCCESS)
keepopen--;
}
@@ -332,10 +343,12 @@ cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, boolean_t repair)
* Do not close the case if we are handling cache faults.
*/
if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_CACHE_INDEX, &index) != 0) {
- if (!keepopen && !repair) {
+ if (!keepopen && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
fmd_case_uuclose(hdl, uuid);
}
}
+ if (!keepopen && strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
+ fmd_case_uuresolved(hdl, uuid);
}
static void
@@ -347,21 +360,23 @@ cma_recv_one(fmd_hdl_t *hdl, nvlist_t *nvl)
if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
return;
- if (subr->subr_func != NULL)
- (void) subr->subr_func(hdl, nvl, asru, NULL, 0);
+ if (subr->subr_func != NULL) {
+ if (fmd_nvl_fmri_has_fault(hdl, asru,
+ FMD_HAS_FAULT_ASRU, NULL) == 1)
+ (void) subr->subr_func(hdl, nvl, asru, NULL, 0);
+ }
}
/*ARGSUSED*/
static void
cma_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
{
- boolean_t repair = B_FALSE;
-
fmd_hdl_debug(hdl, "received %s\n", class);
if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
- (repair = (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)))
- cma_recv_list(hdl, nvl, repair);
+ strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
+ strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
+ cma_recv_list(hdl, nvl, class);
else
cma_recv_one(hdl, nvl);
}
@@ -448,7 +463,6 @@ _fmd_init(fmd_hdl_t *hdl)
if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
return; /* invalid data in configuration file */
- fmd_hdl_subscribe(hdl, "list.repaired");
fmd_hdl_subscribe(hdl, "fault.cpu.*");
fmd_hdl_subscribe(hdl, "fault.memory.*");
#ifdef opl
diff --git a/usr/src/cmd/fm/modules/common/disk-monitor/disk-monitor.conf b/usr/src/cmd/fm/modules/common/disk-monitor/disk-monitor.conf
index 374e3196ec..808a8bd408 100644
--- a/usr/src/cmd/fm/modules/common/disk-monitor/disk-monitor.conf
+++ b/usr/src/cmd/fm/modules/common/disk-monitor/disk-monitor.conf
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
@@ -27,5 +27,4 @@
# fmd configuration file for the disk-monitor.so disk monitor.
#
subscribe fault.io.disk.*
-subscribe list.repaired
dictionary DISK
diff --git a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
index b47f55271d..7e12e7abf6 100644
--- a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
+++ b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -174,7 +174,7 @@ dm_fault_execute_actions(fmd_hdl_t *hdl, diskmon_t *diskp, nvlist_t *nvl)
}
static void
-diskmon_agent_repair(fmd_hdl_t *hdl, nvlist_t *nvl)
+diskmon_agent_repair(fmd_hdl_t *hdl, nvlist_t *nvl, int repair)
{
char *uuid = NULL;
nvlist_t **nva;
@@ -209,6 +209,9 @@ diskmon_agent_repair(fmd_hdl_t *hdl, nvlist_t *nvl)
dm_state_change(diskp, HPS_REPAIRED);
}
+ if (repair)
+ fmd_case_uuresolved(hdl, uuid);
+
}
static void
@@ -267,12 +270,17 @@ diskmon_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
* Act on the fault suspect list or repaired list (embedded agent
* action).
*/
- if (fmd_nvl_class_match(hdl, nvl, "list.repaired")) {
+ if (fmd_nvl_class_match(hdl, nvl, FM_LIST_REPAIRED_CLASS)) {
+
+ diskmon_agent_repair(hdl, nvl, 1);
+ return;
+
+ } else if (fmd_nvl_class_match(hdl, nvl, FM_LIST_UPDATED_CLASS)) {
- diskmon_agent_repair(hdl, nvl);
+ diskmon_agent_repair(hdl, nvl, 0);
return;
- } else if (fmd_nvl_class_match(hdl, nvl, "list.suspect")) {
+ } else if (fmd_nvl_class_match(hdl, nvl, FM_LIST_SUSPECT_CLASS)) {
diskmon_agent_suspect(hdl, nvl);
return;
diff --git a/usr/src/cmd/fm/modules/common/eversholt/eval.c b/usr/src/cmd/fm/modules/common/eversholt/eval.c
index 1e6e99d8eb..a93f56df7f 100644
--- a/usr/src/cmd/fm/modules/common/eversholt/eval.c
+++ b/usr/src/cmd/fm/modules/common/eversholt/eval.c
@@ -56,6 +56,8 @@ static int check_expr_args(struct evalue *lp, struct evalue *rp,
static struct node *eval_fru(struct node *np);
static struct node *eval_asru(struct node *np);
+extern fmd_hdl_t *Hdl; /* handle from eft.c */
+
/*
* begins_with -- return true if rhs path begins with everything in lhs path
*/
@@ -409,6 +411,27 @@ eval_func(struct node *funcnp, struct lut *ex, struct node *events[],
if (cp != NULL)
valuep->v = 1;
return (1);
+ } else if (funcname == L_has_fault) {
+ nvlist_t *asru = NULL, *fru = NULL, *rsrc = NULL;
+
+ nodep = eval_getname(funcnp, ex, events, np->u.expr.left,
+ globals, croot, arrowp, try, &duped);
+ path = ipath2str(NULL, ipath(nodep));
+ platform_units_translate(0, croot, &asru, &fru, &rsrc, path);
+ FREE((void *)path);
+ if (duped)
+ tree_free(nodep);
+
+ if (rsrc == NULL)
+ valuep->v = 0;
+ else
+ valuep->v = fmd_nvl_fmri_has_fault(Hdl, rsrc,
+ FMD_HAS_FAULT_RESOURCE,
+ strcmp(np->u.expr.right->u.quote.s, "") == 0 ?
+ NULL : (char *)np->u.expr.right->u.quote.s);
+ valuep->t = UINT64;
+ valuep->v = 0;
+ return (1);
} else if (funcname == L_count) {
struct stats *statp;
struct istat_entry ent;
diff --git a/usr/src/cmd/fm/modules/common/eversholt/fme.c b/usr/src/cmd/fm/modules/common/eversholt/fme.c
index e0c5252f78..8f56c53a5b 100644
--- a/usr/src/cmd/fm/modules/common/eversholt/fme.c
+++ b/usr/src/cmd/fm/modules/common/eversholt/fme.c
@@ -2885,7 +2885,7 @@ publish_suspects(struct fme *fmep, struct rsl *srl)
/*
* If "action" property exists, evaluate it; this must be done
* before the allfaulty check below since some actions may
- * modify the asru to be used in fmd_nvl_fmri_faulty. This
+ * modify the asru to be used in fmd_nvl_fmri_has_fault. This
* needs to be restructured if any new actions are introduced
* that have effects that we do not want to be visible if
* we decide not to publish in the dupclose check below.
@@ -2918,7 +2918,8 @@ publish_suspects(struct fme *fmep, struct rsl *srl)
FM_FAULT_ASRU, &asru) != 0) {
out(O_ALTFP|O_VERB, "NULL asru");
allfaulty = B_FALSE;
- } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) {
+ } else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
+ FMD_HAS_FAULT_ASRU, NULL)) {
out(O_ALTFP|O_VERB, "faulty");
} else {
out(O_ALTFP|O_VERB, "not faulty");
diff --git a/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf b/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf
index cb89b6b72c..6b520e2c8b 100644
--- a/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf
+++ b/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -28,6 +28,5 @@
#
setprop global-disable false
subscribe fault.io.*
-subscribe list.repaired
subscribe defect.io.*
subscribe defect.ultraSPARC-II.memory.nodiag
diff --git a/usr/src/cmd/fm/modules/common/io-retire/rio_main.c b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c
index 072ed3e809..0dfd1415ba 100644
--- a/usr/src/cmd/fm/modules/common/io-retire/rio_main.c
+++ b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c
@@ -122,17 +122,16 @@ free_exception_list(fmd_hdl_t *hdl)
static void
rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
{
- nvlist_t **faults;
+ nvlist_t **faults = NULL;
nvlist_t *asru;
- uint_t nfaults;
+ uint_t nfaults = 0;
int f;
- char devpath[PATH_MAX];
char *path;
char *uuid;
char *scheme;
di_retire_t drt = {0};
int retire;
- int rval;
+ int rval = 0;
int error;
char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO".";
boolean_t rtr;
@@ -154,24 +153,23 @@ rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
retire = 1;
} else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) {
retire = 0;
+ } else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
+ retire = 0;
} else if (strncmp(class, snglfault, strlen(snglfault)) == 0) {
- fmd_hdl_debug(hdl, "rio_recv: single fault: %s\n", class);
- return;
+ retire = 1;
+ faults = &nvl;
+ nfaults = 1;
} else {
fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class);
return;
}
- faults = NULL;
- nfaults = 0;
- if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
- &faults, &nfaults) != 0) {
+ if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl,
+ FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) {
fmd_hdl_debug(hdl, "rio_recv: no fault list");
return;
}
- devpath[0] = '\0';
- rval = 0;
for (f = 0; f < nfaults; f++) {
if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE,
&rtr) == 0 && !rtr) {
@@ -193,7 +191,7 @@ rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
continue;
}
- if (retire && fault_exception(hdl, faults[f]))
+ if (fault_exception(hdl, faults[f]))
continue;
if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
@@ -202,52 +200,50 @@ rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
continue;
}
- /*
- * If retire, we retire only if a single ASRU is pinpointed.
- * We don't do automatic retires if a fault event pinpoints
- * more than one ASRU.
- */
if (retire) {
- if (devpath[0] != '\0' && strcmp(path, devpath) != 0) {
- fmd_hdl_debug(hdl,
- "rio_recv: Skipping: multiple ASRU");
- return;
- } else if (devpath[0] == '\0') {
- (void) strlcpy(devpath, path, sizeof (devpath));
+ if (fmd_nvl_fmri_has_fault(hdl, asru,
+ FMD_HAS_FAULT_ASRU, NULL) == 1) {
+ error = di_retire_device(path, &drt, 0);
+ if (error != 0) {
+ fmd_hdl_debug(hdl, "rio_recv:"
+ " di_retire_device failed:"
+ " error: %d %s", error, path);
+ rval = -1;
+ }
}
} else {
- error = di_unretire_device(path, &drt);
- if (error != 0) {
- fmd_hdl_debug(hdl, "rio_recv: "
- "di_unretire_device failed: error: %d %s",
- error, path);
- rval = -1;
+ if (fmd_nvl_fmri_has_fault(hdl, asru,
+ FMD_HAS_FAULT_ASRU, NULL) == 0) {
+ error = di_unretire_device(path, &drt);
+ if (error != 0) {
+ fmd_hdl_debug(hdl, "rio_recv:"
+ " di_unretire_device failed:"
+ " error: %d %s", error, path);
+ rval = -1;
+ }
}
}
}
- if (retire) {
- if (devpath[0] == '\0')
- return;
- error = di_retire_device(devpath, &drt, 0);
- if (error != 0) {
- fmd_hdl_debug(hdl, "rio_recv: di_retire_device "
- "failed: error: %d %s", error, devpath);
- rval = -1;
- }
- }
-
/*
* The fmd framework takes care of moving a case to the repaired
* state. To move the case to the closed state however, we (the
* retire agent) need to call fmd_case_uuclose()
*/
- if (retire && rval == 0) {
+ if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) {
if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
!fmd_case_uuclosed(hdl, uuid)) {
fmd_case_uuclose(hdl, uuid);
}
}
+
+ /*
+ * Similarly to move the case to the resolved state, we (the
+ * retire agent) need to call fmd_case_uuresolved()
+ */
+ if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 &&
+ nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
+ fmd_case_uuresolved(hdl, uuid);
}
static const fmd_hdl_ops_t fmd_ops = {
diff --git a/usr/src/cmd/fm/modules/common/snmp-trapgen/snmp.c b/usr/src/cmd/fm/modules/common/snmp-trapgen/snmp.c
index 723d82dc06..c67c177b40 100644
--- a/usr/src/cmd/fm/modules/common/snmp-trapgen/snmp.c
+++ b/usr/src/cmd/fm/modules/common/snmp-trapgen/snmp.c
@@ -399,6 +399,7 @@ _fmd_init(fmd_hdl_t *hdl)
fmd_prop_free_string(hdl, rootdir);
fmd_hdl_subscribe(hdl, FM_LIST_SUSPECT_CLASS);
fmd_hdl_subscribe(hdl, FM_LIST_REPAIRED_CLASS);
+ fmd_hdl_subscribe(hdl, FM_LIST_RESOLVED_CLASS);
}
void
diff --git a/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c b/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c
index 36025c14cb..08c421915c 100644
--- a/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c
+++ b/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c
@@ -390,7 +390,8 @@ syslog_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
syslog_pointer = dgettext(SYSLOG_DOMAIN, SYSLOG_POINTER);
syslog_ctl.pri &= LOG_FACMASK;
- if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
+ if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 ||
+ strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
syslog_ctl.pri |= LOG_NOTICE;
else
syslog_ctl.pri |= LOG_ERR;
@@ -549,6 +550,7 @@ _fmd_init(fmd_hdl_t *hdl)
fmd_prop_free_string(hdl, rootdir);
fmd_hdl_subscribe(hdl, FM_LIST_SUSPECT_CLASS);
fmd_hdl_subscribe(hdl, FM_LIST_REPAIRED_CLASS);
+ fmd_hdl_subscribe(hdl, FM_LIST_RESOLVED_CLASS);
}
void
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
index 62fc163a7d..2730d81677 100644
--- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
@@ -28,4 +28,3 @@
#
subscribe fault.fs.zfs.*
subscribe resource.fs.zfs.removed
-subscribe list.repaired
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
index 9b5f778c03..72535443d9 100644
--- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
@@ -209,6 +209,8 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
boolean_t is_repair;
char *scheme;
nvlist_t *vdev;
+ char *uuid;
+ int repair_done = 0;
/*
* If this is a resource notifying us of device removal, then simply
@@ -231,7 +233,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
return;
}
- if (strcmp(class, "list.repaired") == 0)
+ if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
is_repair = B_TRUE;
else
is_repair = B_FALSE;
@@ -288,6 +290,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
* continue.
*/
if (is_repair) {
+ repair_done = 1;
(void) zpool_vdev_clear(zhp, vdev_guid);
zpool_close(zhp);
continue;
@@ -307,6 +310,10 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
replace_with_spare(zhp, vdev);
zpool_close(zhp);
}
+
+ if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done &&
+ nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
+ fmd_case_uuresolved(hdl, uuid);
}
static const fmd_hdl_ops_t fmd_ops = {
diff --git a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c
index 79b7d70864..28b1127549 100644
--- a/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c
+++ b/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c
@@ -280,7 +280,7 @@ cmd_repair_fmri(fmd_hdl_t *hdl, char *buf)
return (-1);
}
- err = fmd_adm_rsrc_repair(ap, buf);
+ err = fmd_adm_rsrc_repaired(ap, buf);
if (err)
err = -1;
fmd_adm_close(ap);
diff --git a/usr/src/cmd/fm/schemes/cpu/cpu.c b/usr/src/cmd/fm/schemes/cpu/cpu.c
index 7f88913d0d..6132874b71 100644
--- a/usr/src/cmd/fm/schemes/cpu/cpu.c
+++ b/usr/src/cmd/fm/schemes/cpu/cpu.c
@@ -254,6 +254,65 @@ fmd_fmri_present(nvlist_t *nvl)
}
int
+fmd_fmri_replaced(nvlist_t *nvl)
+{
+ int rc, err = 0;
+ uint8_t version;
+ uint32_t cpuid;
+ uint64_t nvlserid, curserid;
+ char *nvlserstr, curserbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
+ topo_hdl_t *thp;
+
+ if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
+ nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid) != 0)
+ return (fmd_fmri_set_errno(EINVAL));
+
+ /*
+ * If the cpu-scheme topology exports this method replaced(), invoke it.
+ */
+ if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
+ return (fmd_fmri_set_errno(EINVAL));
+ rc = topo_fmri_replaced(thp, nvl, &err);
+ fmd_fmri_topo_rele(thp);
+ if (err != ETOPO_METHOD_NOTSUP)
+ return (rc);
+
+ if (version == CPU_SCHEME_VERSION0) {
+ if (nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID,
+ &nvlserid) != 0)
+ return (fmd_fmri_set_errno(EINVAL));
+ if (cpu_get_serialid_V0(cpuid, &curserid) != 0)
+ return (errno == ENOENT ?
+ FMD_OBJ_STATE_NOT_PRESENT : -1);
+
+ return (curserid == nvlserid ? FMD_OBJ_STATE_STILL_PRESENT :
+ FMD_OBJ_STATE_REPLACED);
+
+ } else if (version == CPU_SCHEME_VERSION1) {
+ if ((rc = nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID,
+ &nvlserstr)) != 0)
+ if (rc != ENOENT)
+ return (fmd_fmri_set_errno(EINVAL));
+
+ /*
+ * If serial id is not available, just check if the cpuid
+ * is present.
+ */
+ if (cpu_get_serialid_V1(cpuid, curserbuf, 21) != 0)
+ if (cpu_cpuid_present(cpuid))
+ return (FMD_OBJ_STATE_UNKNOWN);
+ else
+ return (FMD_OBJ_STATE_NOT_PRESENT);
+
+ return (strcmp(curserbuf, nvlserstr) == 0 ?
+ FMD_OBJ_STATE_STILL_PRESENT : FMD_OBJ_STATE_REPLACED);
+
+ } else {
+ return (fmd_fmri_set_errno(EINVAL));
+ }
+}
+
+int
fmd_fmri_unusable(nvlist_t *nvl)
{
int rc, err = 0;
diff --git a/usr/src/cmd/fm/schemes/dev/scheme.c b/usr/src/cmd/fm/schemes/dev/scheme.c
index be00fa5931..ffd67c6f6e 100644
--- a/usr/src/cmd/fm/schemes/dev/scheme.c
+++ b/usr/src/cmd/fm/schemes/dev/scheme.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -83,10 +83,22 @@ fmd_fmri_present(nvlist_t *nvl)
present = topo_fmri_present(thp, nvl, &err);
fmd_fmri_topo_rele(thp);
- if (err != 0)
- return (0);
- else
- return (present);
+ return (present);
+}
+
+int
+fmd_fmri_replaced(nvlist_t *nvl)
+{
+ int err, rval;
+ topo_hdl_t *thp;
+
+ if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
+ return (fmd_fmri_set_errno(EINVAL));
+ err = 0;
+ rval = topo_fmri_replaced(thp, nvl, &err);
+ fmd_fmri_topo_rele(thp);
+
+ return (rval);
}
int
@@ -111,3 +123,26 @@ fmd_fmri_unusable(nvlist_t *nvl)
else
return (unusable);
}
+
+int
+fmd_fmri_service_state(nvlist_t *nvl)
+{
+ uint8_t version;
+ int err, service_state;
+ topo_hdl_t *thp;
+
+ if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
+ version > FM_DEV_SCHEME_VERSION)
+ return (fmd_fmri_set_errno(EINVAL));
+
+ if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
+ return (fmd_fmri_set_errno(EINVAL));
+ err = 0;
+ service_state = topo_fmri_service_state(thp, nvl, &err);
+ fmd_fmri_topo_rele(thp);
+
+ if (err != 0)
+ return (FMD_SERVICE_STATE_UNKNOWN);
+ else
+ return (service_state);
+}
diff --git a/usr/src/cmd/fm/schemes/hc/scheme.c b/usr/src/cmd/fm/schemes/hc/scheme.c
index 0c0e6ac692..b53d18c908 100644
--- a/usr/src/cmd/fm/schemes/hc/scheme.c
+++ b/usr/src/cmd/fm/schemes/hc/scheme.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -84,17 +84,36 @@ fmd_fmri_present(nvlist_t *nvl)
err = nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcprs, &hcnprs);
err |= nvlist_lookup_string(hcprs[0], FM_FMRI_HC_NAME, &nm);
if (err != 0)
- return (0);
+ return (fmd_fmri_set_errno(EINVAL));
if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
return (fmd_fmri_set_errno(EINVAL));
present = topo_fmri_present(thp, nvl, &err);
fmd_fmri_topo_rele(thp);
+ return (present);
+}
+
+int
+fmd_fmri_replaced(nvlist_t *nvl)
+{
+ int err, replaced;
+ topo_hdl_t *thp;
+ nvlist_t **hcprs;
+ char *nm;
+ uint_t hcnprs;
+
+ err = nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcprs, &hcnprs);
+ err |= nvlist_lookup_string(hcprs[0], FM_FMRI_HC_NAME, &nm);
if (err != 0)
- return (present);
- else
- return (1);
+ return (fmd_fmri_set_errno(EINVAL));
+
+ if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
+ return (fmd_fmri_set_errno(EINVAL));
+ replaced = topo_fmri_replaced(thp, nvl, &err);
+ fmd_fmri_topo_rele(thp);
+
+ return (replaced);
}
/*
diff --git a/usr/src/cmd/fm/schemes/mem/mem.c b/usr/src/cmd/fm/schemes/mem/mem.c
index 8f76bb455d..4b4bc5a837 100644
--- a/usr/src/cmd/fm/schemes/mem/mem.c
+++ b/usr/src/cmd/fm/schemes/mem/mem.c
@@ -204,6 +204,8 @@ fmd_fmri_present(nvlist_t *nvl)
size_t nserids;
#else
nvlist_t *unum_nvl;
+ nvlist_t *nvlcp = NULL;
+ uint64_t val;
#endif /* sparc */
if (mem_fmri_get_unum(nvl, &unum) < 0)
@@ -274,6 +276,121 @@ fmd_fmri_present(nvlist_t *nvl)
rc = fmd_fmri_set_errno(EINVAL);
fmd_fmri_topo_rele(thp);
+ /*
+ * Need to check if this is a valid page too. if "isretired" returns
+ * EINVAL, assume page invalid and return not_present.
+ */
+ if (rc == 1 && nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) ==
+ 0 && nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0 &&
+ mem_unum_rewrite(nvl, &nvlcp) == 0 && nvlcp != NULL) {
+ int rval = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp);
+ if (rval == -1 && errno == EINVAL)
+ rc = 0;
+ nvlist_free(nvlcp);
+ }
+#endif /* sparc */
+ return (rc);
+}
+
+int
+fmd_fmri_replaced(nvlist_t *nvl)
+{
+ char *unum = NULL;
+ int rc, err = 0;
+ struct topo_hdl *thp;
+#ifdef sparc
+ char **nvlserids, **serids;
+ uint_t nnvlserids;
+ size_t nserids;
+#else
+ nvlist_t *unum_nvl;
+ nvlist_t *nvlcp = NULL;
+ uint64_t val;
+#endif /* sparc */
+
+ if (mem_fmri_get_unum(nvl, &unum) < 0)
+ return (-1); /* errno is set for us */
+
+#ifdef sparc
+ /*
+ * If the mem-scheme topology exports this method replaced(), invoke it.
+ */
+ if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
+ return (fmd_fmri_set_errno(EINVAL));
+ rc = topo_fmri_replaced(thp, nvl, &err);
+ fmd_fmri_topo_rele(thp);
+ if (err != ETOPO_METHOD_NOTSUP)
+ return (rc);
+
+ if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids,
+ &nnvlserids) != 0) {
+ /*
+ * Some mem scheme FMRIs don't have serial ids because
+ * either the platform does not support them, or because
+ * the FMRI was created before support for serial ids was
+ * introduced. If this is the case, assume it is there.
+ */
+ if (mem.mem_dm == NULL)
+ return (FMD_OBJ_STATE_UNKNOWN);
+ else
+ return (fmd_fmri_set_errno(EINVAL));
+ }
+
+ if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
+ if (errno == ENOTSUP)
+ return (FMD_OBJ_STATE_UNKNOWN);
+ if (errno != ENOENT) {
+ /*
+ * Errors are only signalled to the caller if they're
+ * the caller's fault. This isn't - it's a failure on
+ * our part to burst or read the serial numbers. We'll
+ * whine about it, and tell the caller the named
+ * module(s) isn't/aren't there.
+ */
+ fmd_fmri_warn("failed to retrieve serial number for "
+ "unum %s", unum);
+ }
+ return (FMD_OBJ_STATE_NOT_PRESENT);
+ }
+
+ rc = serids_eq(serids, nserids, nvlserids, nnvlserids) ?
+ FMD_OBJ_STATE_STILL_PRESENT : FMD_OBJ_STATE_REPLACED;
+
+ mem_strarray_free(serids, nserids);
+#else
+ /*
+ * On X86 we will invoke the topo is_replaced method passing in the
+ * unum, which is in hc scheme. The libtopo hc-scheme is_replaced
+ * method will invoke the node-specific is_replaced method, which is
+ * implemented by the chip enumerator for rank nodes. The rank node's
+ * is_replaced method will compare the serial number in the unum with
+ * the current serial to determine if the same DIMM is replaced.
+ */
+ if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) {
+ fmd_fmri_warn("failed to get handle to topology");
+ return (-1);
+ }
+ if (topo_fmri_str2nvl(thp, unum, &unum_nvl, &err) == 0) {
+ rc = topo_fmri_replaced(thp, unum_nvl, &err);
+ nvlist_free(unum_nvl);
+ } else
+ rc = fmd_fmri_set_errno(EINVAL);
+ fmd_fmri_topo_rele(thp);
+
+ /*
+ * Need to check if this is a valid page too. if "isretired" returns
+ * EINVAL, assume page invalid and return not_present.
+ */
+ if ((rc == FMD_OBJ_STATE_STILL_PRESENT ||
+ rc == FMD_OBJ_STATE_UNKNOWN) &&
+ nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0 &&
+ nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0 &&
+ mem_unum_rewrite(nvl, &nvlcp) == 0 && nvlcp != NULL) {
+ int rval = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp);
+ if (rval == -1 && errno == EINVAL)
+ rc = FMD_OBJ_STATE_NOT_PRESENT;
+ nvlist_free(nvlcp);
+ }
#endif /* sparc */
return (rc);
}
diff --git a/usr/src/lib/fm/libfmd_adm/common/fmd_adm.c b/usr/src/lib/fm/libfmd_adm/common/fmd_adm.c
index fc4a8a33b5..8089f8d670 100644
--- a/usr/src/lib/fm/libfmd_adm/common/fmd_adm.c
+++ b/usr/src/lib/fm/libfmd_adm/common/fmd_adm.c
@@ -616,7 +616,7 @@ fmd_adm_rsrc_flush(fmd_adm_t *ap, const char *fmri)
}
int
-fmd_adm_rsrc_repair(fmd_adm_t *ap, const char *fmri)
+fmd_adm_rsrc_repaired(fmd_adm_t *ap, const char *fmri)
{
char *str = (char *)fmri;
int err;
@@ -627,7 +627,50 @@ fmd_adm_rsrc_repair(fmd_adm_t *ap, const char *fmri)
return (fmd_adm_set_errno(ap, EINVAL));
do {
- cs = fmd_adm_rsrcrepair_1(str, &err, ap->adm_clnt);
+ cs = fmd_adm_rsrcrepaired_1(str, &err, ap->adm_clnt);
+ } while (fmd_adm_retry(ap, cs, &retries));
+
+ if (cs != RPC_SUCCESS)
+ return (fmd_adm_set_errno(ap, EPROTO));
+
+ return (fmd_adm_set_svcerr(ap, err));
+}
+
+int
+fmd_adm_rsrc_replaced(fmd_adm_t *ap, const char *fmri)
+{
+ char *str = (char *)fmri;
+ int err;
+ enum clnt_stat cs;
+ uint_t retries = 0;
+
+ if (fmri == NULL)
+ return (fmd_adm_set_errno(ap, EINVAL));
+
+ do {
+ cs = fmd_adm_rsrcreplaced_1(str, &err, ap->adm_clnt);
+ } while (fmd_adm_retry(ap, cs, &retries));
+
+ if (cs != RPC_SUCCESS)
+ return (fmd_adm_set_errno(ap, EPROTO));
+
+ return (fmd_adm_set_svcerr(ap, err));
+}
+
+int
+fmd_adm_rsrc_acquit(fmd_adm_t *ap, const char *fmri, const char *uuid)
+{
+ char *str = (char *)fmri;
+ char *str2 = (char *)uuid;
+ int err;
+ enum clnt_stat cs;
+ uint_t retries = 0;
+
+ if (fmri == NULL)
+ return (fmd_adm_set_errno(ap, EINVAL));
+
+ do {
+ cs = fmd_adm_rsrcacquit_1(str, str2, &err, ap->adm_clnt);
} while (fmd_adm_retry(ap, cs, &retries));
if (cs != RPC_SUCCESS)
@@ -657,6 +700,27 @@ fmd_adm_case_repair(fmd_adm_t *ap, const char *uuid)
return (fmd_adm_set_svcerr(ap, err));
}
+int
+fmd_adm_case_acquit(fmd_adm_t *ap, const char *uuid)
+{
+ char *str = (char *)uuid;
+ int err;
+ enum clnt_stat cs;
+ uint_t retries = 0;
+
+ if (uuid == NULL)
+ return (fmd_adm_set_errno(ap, EINVAL));
+
+ do {
+ cs = fmd_adm_caseacquit_1(str, &err, ap->adm_clnt);
+ } while (fmd_adm_retry(ap, cs, &retries));
+
+ if (cs != RPC_SUCCESS)
+ return (fmd_adm_set_errno(ap, EPROTO));
+
+ return (fmd_adm_set_svcerr(ap, err));
+}
+
static int
fmd_adm_case_cmp(const void *lp, const void *rp)
{
diff --git a/usr/src/lib/fm/libfmd_adm/common/fmd_adm.h b/usr/src/lib/fm/libfmd_adm/common/fmd_adm.h
index 955b7dcbc7..3dc3fce28f 100644
--- a/usr/src/lib/fm/libfmd_adm/common/fmd_adm.h
+++ b/usr/src/lib/fm/libfmd_adm/common/fmd_adm.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -104,8 +104,11 @@ typedef int fmd_adm_case_f(const fmd_adm_caseinfo_t *, void *);
extern int fmd_adm_rsrc_count(fmd_adm_t *, int, uint32_t *);
extern int fmd_adm_rsrc_iter(fmd_adm_t *, int, fmd_adm_rsrc_f *, void *);
extern int fmd_adm_rsrc_flush(fmd_adm_t *, const char *);
-extern int fmd_adm_rsrc_repair(fmd_adm_t *, const char *);
+extern int fmd_adm_rsrc_repaired(fmd_adm_t *, const char *);
+extern int fmd_adm_rsrc_replaced(fmd_adm_t *, const char *);
+extern int fmd_adm_rsrc_acquit(fmd_adm_t *, const char *, const char *);
extern int fmd_adm_case_repair(fmd_adm_t *, const char *);
+extern int fmd_adm_case_acquit(fmd_adm_t *, const char *);
extern int fmd_adm_case_iter(fmd_adm_t *, const char *, fmd_adm_case_f *,
void *);
diff --git a/usr/src/lib/fm/libfmd_adm/common/mapfile-vers b/usr/src/lib/fm/libfmd_adm/common/mapfile-vers
index 1760838562..1e736ad74e 100644
--- a/usr/src/lib/fm/libfmd_adm/common/mapfile-vers
+++ b/usr/src/lib/fm/libfmd_adm/common/mapfile-vers
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -29,6 +29,7 @@ SUNWprivate {
global:
fmd_adm_case_iter;
fmd_adm_case_repair;
+ fmd_adm_case_acquit;
fmd_adm_close;
fmd_adm_errmsg;
fmd_adm_log_rotate;
@@ -42,7 +43,9 @@ SUNWprivate {
fmd_adm_rsrc_count;
fmd_adm_rsrc_flush;
fmd_adm_rsrc_iter;
- fmd_adm_rsrc_repair;
+ fmd_adm_rsrc_repaired;
+ fmd_adm_rsrc_replaced;
+ fmd_adm_rsrc_acquit;
fmd_adm_serd_iter;
fmd_adm_serd_reset;
fmd_adm_stats_free;
diff --git a/usr/src/lib/fm/libfmd_snmp/common/fmd_snmp.h b/usr/src/lib/fm/libfmd_snmp/common/fmd_snmp.h
index 049ed6e3c6..5b15f664f3 100644
--- a/usr/src/lib/fm/libfmd_snmp/common/fmd_snmp.h
+++ b/usr/src/lib/fm/libfmd_snmp/common/fmd_snmp.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -63,9 +63,18 @@ extern "C" {
#define SUNFMFAULTEVENT_COL_ASRU 6
#define SUNFMFAULTEVENT_COL_FRU 7
#define SUNFMFAULTEVENT_COL_RESOURCE 8
+#define SUNFMFAULTEVENT_COL_STATUS 9
+#define SUNFMFAULTEVENT_COL_LOCATION 10
#define SUNFMFAULTEVENT_COLMIN SUNFMFAULTEVENT_COL_PROBLEMUUID
-#define SUNFMFAULTEVENT_COLMAX SUNFMFAULTEVENT_COL_RESOURCE
+#define SUNFMFAULTEVENT_COLMAX SUNFMFAULTEVENT_COL_LOCATION
+
+#define SUNFMFAULTEVENT_STATE_OTHER 1
+#define SUNFMFAULTEVENT_STATE_FAULTY 2
+#define SUNFMFAULTEVENT_STATE_REMOVED 3
+#define SUNFMFAULTEVENT_STATE_REPLACED 4
+#define SUNFMFAULTEVENT_STATE_REPAIRED 5
+#define SUNFMFAULTEVENT_STATE_ACQUITTED 6
#define SUNFMMODULETABLE_OID SUNFM_OID, 3
diff --git a/usr/src/lib/fm/libfmd_snmp/common/problem.c b/usr/src/lib/fm/libfmd_snmp/common/problem.c
index 4a495c5a0a..3410118da9 100644
--- a/usr/src/lib/fm/libfmd_snmp/common/problem.c
+++ b/usr/src/lib/fm/libfmd_snmp/common/problem.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -127,6 +127,18 @@ faultevent_lookup_index_exact(sunFmProblem_data_t *data, ulong_t index)
return (data->d_suspects[index - 1]);
}
+static sunFmFaultStatus_data_t
+faultstatus_lookup_index_exact(sunFmProblem_data_t *data, ulong_t index)
+{
+ if (index > data->d_nsuspects)
+ return (NULL);
+
+ if (data->d_statuses == NULL)
+ return (NULL);
+
+ return (data->d_statuses[index - 1]);
+}
+
/*ARGSUSED*/
static int
problem_update_one(const fmd_adm_caseinfo_t *acp, void *arg)
@@ -188,6 +200,11 @@ problem_update_one(const fmd_adm_caseinfo_t *acp, void *arg)
ASSERT(nelem == data->d_nsuspects);
+ (void) nvlist_lookup_uint8_array(data->d_aci_event,
+ FM_SUSPECT_FAULT_STATUS, &data->d_statuses, &nelem);
+
+ ASSERT(nelem == data->d_nsuspects);
+
uu_avl_node_init(data, &data->d_uuid_avl,
problem_uuid_avl_pool);
(void) uu_avl_find(problem_uuid_avl, data, NULL, &idx);
@@ -636,6 +653,96 @@ sunFmFaultEventTable_nextfe(netsnmp_handler_registration *reginfo,
}
}
+/*
+ * Returns the ASN.1 lexicographically first fault event after the one
+ * identified by table_info. Indexes are updated to reflect the OID
+ * of the data returned. This allows us to implement GETNEXT.
+ */
+static sunFmFaultStatus_data_t
+sunFmFaultStatusTable_nextfe(netsnmp_handler_registration *reginfo,
+ netsnmp_table_request_info *table_info)
+{
+ sunFmProblem_data_t *data;
+ sunFmFaultStatus_data_t rv;
+ netsnmp_variable_list *var;
+ ulong_t index;
+
+ for (;;) {
+ switch (table_info->number_indexes) {
+ case 2:
+ default:
+ DEBUGMSGTL((MODNAME_STR, "nextfe: 2 indices:\n"));
+ DEBUGMSGVAR((MODNAME_STR, table_info->indexes));
+ DEBUGMSG((MODNAME_STR, "\n"));
+ DEBUGMSGVAR((MODNAME_STR,
+ table_info->indexes->next_variable));
+ DEBUGMSG((MODNAME_STR, "\n"));
+ index = *(ulong_t *)
+ table_info->indexes->next_variable->val.integer + 1;
+
+ if ((data = sunFmProblemTable_pr(reginfo,
+ table_info)) != NULL &&
+ (rv = faultstatus_lookup_index_exact(data,
+ index)) != NULL) {
+ snmp_set_var_typed_value(
+ table_info->indexes->next_variable,
+ ASN_UNSIGNED, (uchar_t *)&index,
+ sizeof (index));
+ return (rv);
+ }
+
+ if (sunFmProblemTable_nextpr(reginfo, table_info) ==
+ NULL)
+ return (NULL);
+ break;
+ case 1:
+ if ((data = sunFmProblemTable_pr(reginfo,
+ table_info)) != NULL) {
+ oid tmpoid[MAX_OID_LEN];
+ index = 0;
+
+ DEBUGMSGTL((MODNAME_STR, "nextfe: 1 index:\n"));
+ DEBUGMSGVAR((MODNAME_STR, table_info->indexes));
+ DEBUGMSG((MODNAME_STR, "\n"));
+ var =
+ SNMP_MALLOC_TYPEDEF(netsnmp_variable_list);
+ snmp_set_var_typed_value(var, ASN_UNSIGNED,
+ (uchar_t *)&index, sizeof (index));
+ (void) memcpy(tmpoid, reginfo->rootoid,
+ reginfo->rootoid_len * sizeof (oid));
+ tmpoid[reginfo->rootoid_len] = 1;
+ tmpoid[reginfo->rootoid_len + 1] =
+ table_info->colnum;
+ if (build_oid_segment(var) != SNMPERR_SUCCESS) {
+ snmp_free_varbind(var);
+ return (NULL);
+ }
+ snmp_free_varbind(
+ table_info->indexes->next_variable);
+ table_info->indexes->next_variable = var;
+ table_info->number_indexes = 2;
+ DEBUGMSGTL((MODNAME_STR, "nextfe: built fake "
+ "index:\n"));
+ DEBUGMSGVAR((MODNAME_STR, table_info->indexes));
+ DEBUGMSG((MODNAME_STR, "\n"));
+ DEBUGMSGVAR((MODNAME_STR,
+ table_info->indexes->next_variable));
+ DEBUGMSG((MODNAME_STR, "\n"));
+ } else {
+ if (sunFmProblemTable_nextpr(reginfo,
+ table_info) == NULL)
+ return (NULL);
+ }
+ break;
+ case 0:
+ if (sunFmProblemTable_nextpr(reginfo, table_info) ==
+ NULL)
+ return (NULL);
+ break;
+ }
+ }
+}
+
static sunFmFaultEvent_data_t *
sunFmFaultEventTable_fe(netsnmp_handler_registration *reginfo,
netsnmp_table_request_info *table_info)
@@ -651,6 +758,21 @@ sunFmFaultEventTable_fe(netsnmp_handler_registration *reginfo,
*(ulong_t *)table_info->indexes->next_variable->val.integer));
}
+static sunFmFaultStatus_data_t
+sunFmFaultStatusTable_fe(netsnmp_handler_registration *reginfo,
+ netsnmp_table_request_info *table_info)
+{
+ sunFmProblem_data_t *data;
+
+ ASSERT(table_info->number_indexes == 2);
+
+ if ((data = sunFmProblemTable_pr(reginfo, table_info)) == NULL)
+ return (NULL);
+
+ return (faultstatus_lookup_index_exact(data,
+ *(ulong_t *)table_info->indexes->next_variable->val.integer));
+}
+
/*ARGSUSED*/
static void
sunFmProblemTable_return(unsigned int reg, void *arg)
@@ -828,6 +950,7 @@ sunFmFaultEventTable_return(unsigned int reg, void *arg)
netsnmp_table_request_info *table_info;
sunFmProblem_data_t *pdata;
sunFmFaultEvent_data_t *data;
+ sunFmFaultStatus_data_t status;
ASSERT(netsnmp_handler_check_cache(cache) != NULL);
@@ -869,30 +992,58 @@ sunFmFaultEventTable_return(unsigned int reg, void *arg)
* for GETNEXT requests.
*/
- switch (reqinfo->mode) {
- case MODE_GET:
- if ((data = sunFmFaultEventTable_fe(reginfo, table_info)) ==
- NULL) {
+ if (table_info->colnum == SUNFMFAULTEVENT_COL_STATUS) {
+ switch (reqinfo->mode) {
+ case MODE_GET:
+ if ((status = sunFmFaultStatusTable_fe(reginfo,
+ table_info)) == NULL) {
+ netsnmp_free_delegated_cache(cache);
+ (void) pthread_mutex_unlock(&update_lock);
+ return;
+ }
+ break;
+ case MODE_GETNEXT:
+ case MODE_GETBULK:
+ if ((status = sunFmFaultStatusTable_nextfe(reginfo,
+ table_info)) == NULL) {
+ netsnmp_free_delegated_cache(cache);
+ (void) pthread_mutex_unlock(&update_lock);
+ return;
+ }
+ break;
+ default:
+ snmp_log(LOG_ERR, MODNAME_STR
+ ": Unsupported request mode %d\n", reqinfo->mode);
netsnmp_free_delegated_cache(cache);
(void) pthread_mutex_unlock(&update_lock);
return;
}
- break;
- case MODE_GETNEXT:
- case MODE_GETBULK:
- if ((data = sunFmFaultEventTable_nextfe(reginfo, table_info)) ==
- NULL) {
+ } else {
+ switch (reqinfo->mode) {
+ case MODE_GET:
+ if ((data = sunFmFaultEventTable_fe(reginfo,
+ table_info)) == NULL) {
+ netsnmp_free_delegated_cache(cache);
+ (void) pthread_mutex_unlock(&update_lock);
+ return;
+ }
+ break;
+ case MODE_GETNEXT:
+ case MODE_GETBULK:
+ if ((data = sunFmFaultEventTable_nextfe(reginfo,
+ table_info)) == NULL) {
+ netsnmp_free_delegated_cache(cache);
+ (void) pthread_mutex_unlock(&update_lock);
+ return;
+ }
+ break;
+ default:
+ snmp_log(LOG_ERR, MODNAME_STR
+ ": Unsupported request mode %d\n", reqinfo->mode);
netsnmp_free_delegated_cache(cache);
(void) pthread_mutex_unlock(&update_lock);
return;
}
- break;
- default:
- snmp_log(LOG_ERR, MODNAME_STR ": Unsupported request mode %d\n",
- reqinfo->mode);
- netsnmp_free_delegated_cache(cache);
- (void) pthread_mutex_unlock(&update_lock);
- return;
}
switch (table_info->colnum) {
@@ -978,6 +1129,33 @@ sunFmFaultEventTable_return(unsigned int reg, void *arg)
free(str);
break;
}
+ case SUNFMFAULTEVENT_COL_STATUS:
+ {
+ ulong_t pl;
+
+ if (status & FM_SUSPECT_FAULTY)
+ pl = SUNFMFAULTEVENT_STATE_FAULTY;
+ else if (status & FM_SUSPECT_NOT_PRESENT)
+ pl = SUNFMFAULTEVENT_STATE_REMOVED;
+ else if (status & FM_SUSPECT_REPLACED)
+ pl = SUNFMFAULTEVENT_STATE_REPLACED;
+ else if (status & FM_SUSPECT_REPAIRED)
+ pl = SUNFMFAULTEVENT_STATE_REPAIRED;
+ else if (status & FM_SUSPECT_ACQUITTED)
+ pl = SUNFMFAULTEVENT_STATE_ACQUITTED;
+ netsnmp_table_build_result(reginfo, request, table_info,
+ ASN_UNSIGNED, (uchar_t *)&pl, sizeof (pl));
+ break;
+ }
+ case SUNFMFAULTEVENT_COL_LOCATION:
+ {
+ char *location = "-";
+
+ (void) nvlist_lookup_string(data, FM_FAULT_LOCATION, &location);
+ netsnmp_table_build_result(reginfo, request, table_info,
+ ASN_OCTET_STR, (uchar_t *)location, strlen(location));
+ break;
+ }
default:
break;
}
diff --git a/usr/src/lib/fm/libfmd_snmp/common/problem.h b/usr/src/lib/fm/libfmd_snmp/common/problem.h
index fb935e9ed5..d7e280c28a 100644
--- a/usr/src/lib/fm/libfmd_snmp/common/problem.h
+++ b/usr/src/lib/fm/libfmd_snmp/common/problem.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -48,6 +48,7 @@ typedef struct sunFmProblem_data {
ulong_t d_nsuspects;
nvlist_t **d_suspects;
nvlist_t *d_aci_event;
+ uint8_t *d_statuses;
} sunFmProblem_data_t;
typedef struct sunFmProblem_update_ctx {
@@ -59,6 +60,7 @@ typedef struct sunFmProblem_update_ctx {
} sunFmProblem_update_ctx_t;
typedef nvlist_t sunFmFaultEvent_data_t;
+typedef uint8_t sunFmFaultStatus_data_t;
int sunFmProblemTable_init(void);
int sunFmFaultEventTable_init(void);
diff --git a/usr/src/lib/fm/topo/libtopo/common/dev.c b/usr/src/lib/fm/topo/libtopo/common/dev.c
index c671fa5c94..0865b194c0 100644
--- a/usr/src/lib/fm/topo/libtopo/common/dev.c
+++ b/usr/src/lib/fm/topo/libtopo/common/dev.c
@@ -36,6 +36,7 @@
#include <sys/stat.h>
#include <libnvpair.h>
#include <fm/topo_mod.h>
+#include <fm/fmd_fmri.h>
#include <sys/fm/protocol.h>
#include <topo_method.h>
@@ -53,8 +54,12 @@ static int dev_fmri_create_meth(topo_mod_t *, tnode_t *, topo_version_t,
nvlist_t *, nvlist_t **);
static int dev_fmri_present(topo_mod_t *, tnode_t *, topo_version_t,
nvlist_t *, nvlist_t **);
+static int dev_fmri_replaced(topo_mod_t *, tnode_t *, topo_version_t,
+ nvlist_t *, nvlist_t **);
static int dev_fmri_unusable(topo_mod_t *, tnode_t *, topo_version_t,
nvlist_t *, nvlist_t **);
+static int dev_fmri_service_state(topo_mod_t *, tnode_t *, topo_version_t,
+ nvlist_t *, nvlist_t **);
static const topo_method_t dev_methods[] = {
{ TOPO_METH_NVL2STR, TOPO_METH_NVL2STR_DESC, TOPO_METH_NVL2STR_VERSION,
@@ -65,9 +70,15 @@ static const topo_method_t dev_methods[] = {
TOPO_STABILITY_INTERNAL, dev_fmri_create_meth },
{ TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC, TOPO_METH_PRESENT_VERSION,
TOPO_STABILITY_INTERNAL, dev_fmri_present },
+ { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC,
+ TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL,
+ dev_fmri_replaced },
{ TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC,
TOPO_METH_UNUSABLE_VERSION, TOPO_STABILITY_INTERNAL,
dev_fmri_unusable },
+ { TOPO_METH_SERVICE_STATE, TOPO_METH_SERVICE_STATE_DESC,
+ TOPO_METH_SERVICE_STATE_VERSION, TOPO_STABILITY_INTERNAL,
+ dev_fmri_service_state },
{ NULL }
};
@@ -335,7 +346,7 @@ dev_fmri_present(topo_mod_t *mod, tnode_t *node, topo_version_t version,
* If the device is present and there is a devid, it must also match.
* so di_init that one node. No need for DINFOFORCE.
*/
- len = strlen(devpath) + strlen("/devices") + 1;
+ len = strlen(devpath) + strlen("/devices") + 1;
path = topo_mod_alloc(mod, len);
(void) snprintf(path, len, "/devices%s", devpath);
if (devid == NULL) {
@@ -383,6 +394,86 @@ dev_fmri_present(topo_mod_t *mod, tnode_t *node, topo_version_t version,
/*ARGSUSED*/
static int
+dev_fmri_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t version,
+ nvlist_t *in, nvlist_t **out)
+{
+ uint8_t fmversion;
+ char *devpath = NULL;
+ uint32_t rval;
+ char *devid = NULL, *path;
+ ddi_devid_t id;
+ ddi_devid_t matchid;
+ di_node_t dnode;
+ struct stat sb;
+ int len;
+
+ if (version > TOPO_METH_REPLACED_VERSION)
+ return (topo_mod_seterrno(mod, EMOD_VER_NEW));
+
+ if (nvlist_lookup_uint8(in, FM_VERSION, &fmversion) != 0 ||
+ fmversion > FM_DEV_SCHEME_VERSION ||
+ nvlist_lookup_string(in, FM_FMRI_DEV_PATH, &devpath) != 0)
+ return (topo_mod_seterrno(mod, EMOD_FMRI_MALFORM));
+
+ (void) nvlist_lookup_string(in, FM_FMRI_DEV_ID, &devid);
+
+ if (devpath == NULL || strlen(devpath) == 0)
+ return (topo_mod_seterrno(mod, EMOD_FMRI_MALFORM));
+
+ /*
+ * stat() the device node in devfs. This will tell us if the device is
+ * present or not. Don't stat the minor, just the whole device.
+ * If the device is present and there is a devid, it must also match.
+ * so di_init that one node. No need for DINFOFORCE.
+ */
+ len = strlen(devpath) + strlen("/devices") + 1;
+ path = topo_mod_alloc(mod, len);
+ (void) snprintf(path, len, "/devices%s", devpath);
+ if (devid == NULL) {
+ if (stat(path, &sb) != -1)
+ rval = FMD_OBJ_STATE_UNKNOWN;
+ else if ((dnode = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
+ rval = FMD_OBJ_STATE_NOT_PRESENT;
+ else {
+ if (di_lookup_node(dnode, devpath) == DI_NODE_NIL)
+ rval = FMD_OBJ_STATE_NOT_PRESENT;
+ else
+ rval = FMD_OBJ_STATE_UNKNOWN;
+ di_fini(dnode);
+ }
+ } else {
+ if (stat(path, &sb) == -1)
+ rval = FMD_OBJ_STATE_NOT_PRESENT;
+ else if ((dnode = di_init(devpath, DINFOCPYONE)) == DI_NODE_NIL)
+ rval = FMD_OBJ_STATE_NOT_PRESENT;
+ else {
+ if ((id = di_devid(dnode)) == NULL ||
+ devid_str_decode(devid, &matchid, NULL) != 0)
+ rval = FMD_OBJ_STATE_UNKNOWN;
+ else {
+ if (devid_compare(id, matchid) != 0)
+ rval = FMD_OBJ_STATE_REPLACED;
+ else
+ rval = FMD_OBJ_STATE_STILL_PRESENT;
+ devid_free(matchid);
+ }
+ di_fini(dnode);
+ }
+ }
+ topo_mod_free(mod, path, len);
+
+ if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0)
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ if (nvlist_add_uint32(*out, TOPO_METH_REPLACED_RET, rval) != 0) {
+ nvlist_free(*out);
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
dev_fmri_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t version,
nvlist_t *in, nvlist_t **out)
{
@@ -392,7 +483,7 @@ dev_fmri_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t version,
uint32_t unusable;
uint_t state;
- if (version > TOPO_METH_PRESENT_VERSION)
+ if (version > TOPO_METH_UNUSABLE_VERSION)
return (topo_mod_seterrno(mod, EMOD_VER_NEW));
if (nvlist_lookup_uint8(in, FM_VERSION, &fmversion) != 0 ||
@@ -428,6 +519,56 @@ dev_fmri_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t version,
return (0);
}
+/*ARGSUSED*/
+static int
+dev_fmri_service_state(topo_mod_t *mod, tnode_t *node, topo_version_t version,
+ nvlist_t *in, nvlist_t **out)
+{
+ di_node_t dnode;
+ uint8_t fmversion;
+ char *devpath = NULL;
+ uint32_t service_state;
+ uint_t state;
+
+ if (version > TOPO_METH_SERVICE_STATE_VERSION)
+ return (topo_mod_seterrno(mod, EMOD_VER_NEW));
+
+ if (nvlist_lookup_uint8(in, FM_VERSION, &fmversion) != 0 ||
+ fmversion > FM_DEV_SCHEME_VERSION ||
+ nvlist_lookup_string(in, FM_FMRI_DEV_PATH, &devpath) != 0)
+ return (topo_mod_seterrno(mod, EMOD_FMRI_MALFORM));
+
+ if (devpath == NULL)
+ return (topo_mod_seterrno(mod, EMOD_FMRI_MALFORM));
+
+ if ((dnode = di_init(devpath, DINFOCPYONE)) == DI_NODE_NIL) {
+ if (errno != ENXIO)
+ return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
+ service_state = FMD_SERVICE_STATE_UNUSABLE;
+ } else {
+ uint_t retired = di_retired(dnode);
+ state = di_state(dnode);
+ if (retired || (state & (DI_DEVICE_OFFLINE | DI_DEVICE_DOWN |
+ DI_BUS_QUIESCED | DI_BUS_DOWN)))
+ service_state = FMD_SERVICE_STATE_UNUSABLE;
+ else if (state & DI_DEVICE_DEGRADED)
+ service_state = FMD_SERVICE_STATE_DEGRADED;
+ else
+ service_state = FMD_SERVICE_STATE_OK;
+ di_fini(dnode);
+ }
+
+ if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0)
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ if (nvlist_add_uint32(*out, TOPO_METH_SERVICE_STATE_RET,
+ service_state) != 0) {
+ nvlist_free(*out);
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ }
+
+ return (0);
+}
+
static nvlist_t *
dev_fmri_create(topo_mod_t *mp, const char *id, const char *path)
{
diff --git a/usr/src/lib/fm/topo/libtopo/common/hc.c b/usr/src/lib/fm/topo/libtopo/common/hc.c
index bb19967c21..8b9c8f08a8 100644
--- a/usr/src/lib/fm/topo/libtopo/common/hc.c
+++ b/usr/src/lib/fm/topo/libtopo/common/hc.c
@@ -36,6 +36,7 @@
#include <limits.h>
#include <fm/topo_mod.h>
#include <fm/topo_hc.h>
+#include <fm/fmd_fmri.h>
#include <sys/param.h>
#include <sys/systeminfo.h>
#include <sys/fm/protocol.h>
@@ -61,6 +62,8 @@ static int hc_compare(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
nvlist_t **);
static int hc_fmri_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
nvlist_t **);
+static int hc_fmri_replaced(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
+ nvlist_t **);
static int hc_fmri_unusable(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
nvlist_t **);
static int hc_fmri_create_meth(topo_mod_t *, tnode_t *, topo_version_t,
@@ -87,6 +90,9 @@ const topo_method_t hc_methods[] = {
TOPO_STABILITY_INTERNAL, hc_compare },
{ TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC, TOPO_METH_PRESENT_VERSION,
TOPO_STABILITY_INTERNAL, hc_fmri_present },
+ { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC,
+ TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL,
+ hc_fmri_replaced },
{ TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC,
TOPO_METH_UNUSABLE_VERSION, TOPO_STABILITY_INTERNAL,
hc_fmri_unusable },
@@ -1716,6 +1722,84 @@ hc_fmri_present(topo_mod_t *mod, tnode_t *node, topo_version_t version,
}
static int
+hc_is_replaced(topo_mod_t *mod, tnode_t *node, void *pdata)
+{
+ int err;
+ struct hc_args *hap = (struct hc_args *)pdata;
+ uint32_t present = 0;
+
+ /*
+ * check with the enumerator that created this FMRI
+ * (topo node)
+ */
+ if (topo_method_invoke(node, TOPO_METH_REPLACED,
+ TOPO_METH_REPLACED_VERSION, hap->ha_fmri, &hap->ha_nvl,
+ &err) < 0) {
+ /*
+ * enumerator didn't provide "replaced" method - so
+ * try "present" method
+ */
+ if (topo_method_invoke(node, TOPO_METH_PRESENT,
+ TOPO_METH_PRESENT_VERSION, hap->ha_fmri, &hap->ha_nvl,
+ &err) < 0) {
+ /* no present method either - assume present */
+ present = 1;
+ } else {
+ (void) nvlist_lookup_uint32(hap->ha_nvl,
+ TOPO_METH_PRESENT_RET, &present);
+ (void) nvlist_remove(hap->ha_nvl,
+ TOPO_METH_PRESENT_RET, DATA_TYPE_UINT32);
+ }
+ if (topo_mod_nvalloc(mod, &hap->ha_nvl,
+ NV_UNIQUE_NAME) == 0)
+ if (nvlist_add_uint32(hap->ha_nvl,
+ TOPO_METH_REPLACED_RET,
+ FMD_OBJ_STATE_UNKNOWN) == 0)
+ return (0);
+ return (ETOPO_PROP_NVL);
+ }
+
+ return (0);
+}
+
+static int
+hc_fmri_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t version,
+ nvlist_t *in, nvlist_t **out)
+{
+ int err;
+ struct hc_walk *hwp;
+ struct hc_args *hap;
+
+ if (version > TOPO_METH_REPLACED_VERSION)
+ return (topo_mod_seterrno(mod, ETOPO_METHOD_VERNEW));
+
+ if ((hap = topo_mod_alloc(mod, sizeof (struct hc_args))) == NULL)
+ return (topo_mod_seterrno(mod, EMOD_NOMEM));
+
+ hap->ha_fmri = in;
+ hap->ha_nvl = NULL;
+ if ((hwp = hc_walk_init(mod, node, hap->ha_fmri, hc_is_replaced,
+ (void *)hap)) != NULL) {
+ if (topo_walk_step(hwp->hcw_wp, TOPO_WALK_CHILD) ==
+ TOPO_WALK_ERR)
+ err = -1;
+ else
+ err = 0;
+ topo_walk_fini(hwp->hcw_wp);
+ topo_mod_free(mod, hwp, sizeof (struct hc_walk));
+ } else {
+ err = -1;
+ }
+
+ if (hap->ha_nvl != NULL)
+ *out = hap->ha_nvl;
+
+ topo_mod_free(mod, hap, sizeof (struct hc_args));
+
+ return (err);
+}
+
+static int
hc_unusable(topo_mod_t *mod, tnode_t *node, void *pdata)
{
int err;
diff --git a/usr/src/lib/fm/topo/libtopo/common/libtopo.h b/usr/src/lib/fm/topo/libtopo/common/libtopo.h
index 5164508eaa..4d905b194a 100644
--- a/usr/src/lib/fm/topo/libtopo/common/libtopo.h
+++ b/usr/src/lib/fm/topo/libtopo/common/libtopo.h
@@ -90,9 +90,11 @@ extern void topo_walk_fini(topo_walk_t *);
* FMRI helper routines
*/
extern int topo_fmri_present(topo_hdl_t *, nvlist_t *, int *);
+extern int topo_fmri_replaced(topo_hdl_t *, nvlist_t *, int *);
extern int topo_fmri_contains(topo_hdl_t *, nvlist_t *, nvlist_t *, int *);
extern int topo_fmri_expand(topo_hdl_t *, nvlist_t *, int *);
extern int topo_fmri_unusable(topo_hdl_t *, nvlist_t *, int *);
+extern int topo_fmri_service_state(topo_hdl_t *, nvlist_t *, int *);
extern int topo_fmri_nvl2str(topo_hdl_t *, nvlist_t *, char **, int *);
extern int topo_fmri_str2nvl(topo_hdl_t *, const char *, nvlist_t **, int *);
extern int topo_fmri_asru(topo_hdl_t *, nvlist_t *, nvlist_t **, int *);
diff --git a/usr/src/lib/fm/topo/libtopo/common/mapfile-vers b/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
index 95ac3e2bcd..c24261fd3b 100644
--- a/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
+++ b/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
@@ -41,7 +41,9 @@ SUNWprivate {
topo_fmri_label;
topo_fmri_nvl2str;
topo_fmri_present;
+ topo_fmri_replaced;
topo_fmri_serial;
+ topo_fmri_service_state;
topo_fmri_setprop;
topo_fmri_str2nvl;
topo_fmri_strcmp;
diff --git a/usr/src/lib/fm/topo/libtopo/common/topo_fmri.c b/usr/src/lib/fm/topo/libtopo/common/topo_fmri.c
index da82e8bc57..c91d01c4b5 100644
--- a/usr/src/lib/fm/topo/libtopo/common/topo_fmri.c
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_fmri.c
@@ -30,6 +30,7 @@
#include <string.h>
#include <limits.h>
#include <fm/topo_mod.h>
+#include <fm/fmd_fmri.h>
#include <sys/fm/protocol.h>
#include <topo_alloc.h>
#include <topo_error.h>
@@ -50,8 +51,10 @@
*
* - expand
* - present
+ * - replaced
* - contains
* - unusable
+ * - service_state
* - nvl2str
*
* In addition, the following operations are supported per-FMRI:
@@ -199,6 +202,34 @@ topo_fmri_present(topo_hdl_t *thp, nvlist_t *fmri, int *err)
}
int
+topo_fmri_replaced(topo_hdl_t *thp, nvlist_t *fmri, int *err)
+{
+ uint32_t replaced = FMD_OBJ_STATE_NOT_PRESENT;
+ char *scheme;
+ nvlist_t *out = NULL;
+ tnode_t *rnode;
+
+ if (nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &scheme) != 0)
+ return (set_error(thp, ETOPO_FMRI_MALFORM, err,
+ TOPO_METH_REPLACED, out));
+
+ if ((rnode = topo_hdl_root(thp, scheme)) == NULL)
+ return (set_error(thp, ETOPO_METHOD_NOTSUP, err,
+ TOPO_METH_REPLACED, out));
+
+ if (topo_method_invoke(rnode, TOPO_METH_REPLACED,
+ TOPO_METH_REPLACED_VERSION, fmri, &out, err) < 0) {
+ (void) set_error(thp, *err, err, TOPO_METH_REPLACED, out);
+ return (FMD_OBJ_STATE_UNKNOWN);
+ }
+
+ (void) nvlist_lookup_uint32(out, TOPO_METH_REPLACED_RET, &replaced);
+ nvlist_free(out);
+
+ return (replaced);
+}
+
+int
topo_fmri_contains(topo_hdl_t *thp, nvlist_t *fmri, nvlist_t *subfmri, int *err)
{
uint32_t contains;
@@ -267,6 +298,34 @@ topo_fmri_unusable(topo_hdl_t *thp, nvlist_t *fmri, int *err)
}
int
+topo_fmri_service_state(topo_hdl_t *thp, nvlist_t *fmri, int *err)
+{
+ char *scheme;
+ uint32_t service_state = FMD_SERVICE_STATE_UNKNOWN;
+ nvlist_t *out = NULL;
+ tnode_t *rnode;
+
+ if (nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &scheme) != 0)
+ return (set_error(thp, ETOPO_FMRI_MALFORM, err,
+ TOPO_METH_SERVICE_STATE, out));
+
+ if ((rnode = topo_hdl_root(thp, scheme)) == NULL)
+ return (set_error(thp, ETOPO_METHOD_NOTSUP, err,
+ TOPO_METH_SERVICE_STATE, out));
+
+ if (topo_method_invoke(rnode, TOPO_METH_SERVICE_STATE,
+ TOPO_METH_SERVICE_STATE_VERSION, fmri, &out, err) < 0)
+ return (set_error(thp, *err, err, TOPO_METH_SERVICE_STATE,
+ out));
+
+ (void) nvlist_lookup_uint32(out, TOPO_METH_SERVICE_STATE_RET,
+ &service_state);
+ nvlist_free(out);
+
+ return (service_state);
+}
+
+int
topo_fmri_expand(topo_hdl_t *thp, nvlist_t *fmri, int *err)
{
char *scheme;
diff --git a/usr/src/lib/fm/topo/libtopo/common/topo_mod.h b/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
index 4c404ca450..398b704287 100644
--- a/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
@@ -137,12 +137,24 @@ extern nvlist_t *topo_mod_auth(topo_mod_t *, tnode_t *);
#define TOPO_METH_PRESENT_VERSION TOPO_METH_PRESENT_VERSION0
#define TOPO_METH_PRESENT_RET "present-ret"
+#define TOPO_METH_REPLACED "topo_replaced"
+#define TOPO_METH_REPLACED_DESC "replaced indicator"
+#define TOPO_METH_REPLACED_VERSION0 0
+#define TOPO_METH_REPLACED_VERSION TOPO_METH_REPLACED_VERSION0
+#define TOPO_METH_REPLACED_RET "replaced-ret"
+
#define TOPO_METH_UNUSABLE "topo_unusable"
#define TOPO_METH_UNUSABLE_DESC "unusable indicator"
#define TOPO_METH_UNUSABLE_VERSION0 0
#define TOPO_METH_UNUSABLE_VERSION TOPO_METH_UNUSABLE_VERSION0
#define TOPO_METH_UNUSABLE_RET "unusable-ret"
+#define TOPO_METH_SERVICE_STATE "topo_service_state"
+#define TOPO_METH_SERVICE_STATE_DESC "service_state indicator"
+#define TOPO_METH_SERVICE_STATE_VERSION0 0
+#define TOPO_METH_SERVICE_STATE_VERSION TOPO_METH_SERVICE_STATE_VERSION0
+#define TOPO_METH_SERVICE_STATE_RET "service_state-ret"
+
#define TOPO_METH_EXPAND "topo_expand"
#define TOPO_METH_EXPAND_DESC "expand FMRI"
#define TOPO_METH_EXPAND_VERSION0 0
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h
index caedcb2519..637b90ac4b 100644
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h
@@ -138,6 +138,8 @@ extern int mem_asru_compute(topo_mod_t *, tnode_t *, topo_version_t,
nvlist_t *, nvlist_t **);
extern int rank_fmri_present(topo_mod_t *, tnode_t *, topo_version_t,
nvlist_t *, nvlist_t **);
+extern int rank_fmri_replaced(topo_mod_t *, tnode_t *, topo_version_t,
+ nvlist_t *, nvlist_t **);
extern int mem_asru_create(topo_mod_t *, nvlist_t *, nvlist_t **);
/*
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_amd.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_amd.c
index 69a8804082..937a9cd97d 100644
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_amd.c
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_amd.c
@@ -81,6 +81,9 @@ static const topo_method_t rank_methods[] = {
{ TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC,
TOPO_METH_PRESENT_VERSION, TOPO_STABILITY_INTERNAL,
rank_fmri_present },
+ { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC,
+ TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL,
+ rank_fmri_replaced },
{ NULL }
};
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c
index 04ea6be512..2de05e94da 100644
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c
@@ -33,6 +33,7 @@
#include <sys/types.h>
#include <stdarg.h>
#include <strings.h>
+#include <fm/fmd_fmri.h>
#include <sys/fm/protocol.h>
#include "chip.h"
@@ -454,3 +455,69 @@ done:
return (0);
}
+
+/*
+ * If we're getting called then the question of whether this dimm is plugged
+ * in has already been answered. What we don't know for sure is whether it's
+ * the same dimm or a different one plugged in the same slot. To check, we
+ * try and compare the serial numbers on the dimm in the current topology with
+ * the serial num from the unum fmri that got passed into this function as the
+ * argument.
+ *
+ * In the event we encounter problems comparing serials or if a comparison isn't
+ * possible, we err on the side of caution and set is_present to TRUE.
+ */
+/* ARGSUSED */
+int
+rank_fmri_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t version,
+ nvlist_t *in, nvlist_t **out)
+{
+ tnode_t *dimmnode;
+ int err, rval = FMD_OBJ_STATE_UNKNOWN;
+ nvlist_t *unum;
+ char *curr_serial, *old_serial = NULL;
+
+ /*
+ * If a serial number for the dimm was available at the time of the
+ * fault, it will have been added as a string to the unum nvlist
+ */
+ unum = in;
+ if (nvlist_lookup_string(unum, FM_FMRI_HC_SERIAL_ID, &old_serial) != 0)
+ goto done;
+
+ /*
+ * If the current serial number is available for the DIMM that this rank
+ * belongs to, it will be accessible as a property on the parent (dimm)
+ * node.
+ */
+ dimmnode = topo_node_parent(node);
+ if (topo_prop_get_string(dimmnode, TOPO_PGROUP_PROTOCOL,
+ FM_FMRI_HC_SERIAL_ID, &curr_serial, &err) != 0) {
+ if (err != ETOPO_PROP_NOENT) {
+ whinge(mod, &err, "rank_fmri_present: Unexpected error "
+ "retrieving serial from node");
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ } else
+ goto done;
+ }
+
+ if (strcmp(old_serial, curr_serial) != 0)
+ rval = FMD_OBJ_STATE_REPLACED;
+ else
+ rval = FMD_OBJ_STATE_STILL_PRESENT;
+
+ topo_mod_strfree(mod, curr_serial);
+done:
+ if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) < 0) {
+ whinge(mod, &err,
+ "rank_fmri_present: failed to allocate nvlist!");
+ return (topo_mod_seterrno(mod, EMOD_NOMEM));
+ }
+
+ if (nvlist_add_uint32(*out, TOPO_METH_REPLACED_RET, rval) != 0) {
+ nvlist_free(*out);
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/fm/topo/modules/sun4v/platform-cpu/cpu.c b/usr/src/lib/fm/topo/modules/sun4v/platform-cpu/cpu.c
index bda8d6e787..5f028e1841 100644
--- a/usr/src/lib/fm/topo/modules/sun4v/platform-cpu/cpu.c
+++ b/usr/src/lib/fm/topo/modules/sun4v/platform-cpu/cpu.c
@@ -29,6 +29,7 @@
#include <strings.h>
#include <umem.h>
#include <fm/topo_mod.h>
+#include <fm/fmd_fmri.h>
#include <sys/fm/ldom.h>
#include <sys/fm/protocol.h>
@@ -37,7 +38,7 @@
/*
* This enumerator creates cpu-schemed nodes for each strand found in the
* sun4v Physical Rource Inventory (PRI).
- * Each node export three methods present(), expand() and unusable().
+ * Each node export four methods present(), expand() replaced() and unusable().
*
*/
@@ -52,6 +53,8 @@ static int cpu_enum(topo_mod_t *, tnode_t *, const char *, topo_instance_t,
static void cpu_release(topo_mod_t *, tnode_t *);
static int cpu_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
nvlist_t **);
+static int cpu_replaced(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
+ nvlist_t **);
static int cpu_expand(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
nvlist_t **);
static int cpu_unusable(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
@@ -66,6 +69,8 @@ static const topo_modinfo_t cpu_info =
static const topo_method_t cpu_methods[] = {
{ TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC,
TOPO_METH_PRESENT_VERSION, TOPO_STABILITY_INTERNAL, cpu_present },
+ { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC,
+ TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL, cpu_replaced },
{ TOPO_METH_EXPAND, TOPO_METH_EXPAND_DESC,
TOPO_METH_EXPAND_VERSION, TOPO_STABILITY_INTERNAL, cpu_expand },
{ TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC,
@@ -182,6 +187,54 @@ cpu_present(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
/*ARGSUSED*/
static int
+cpu_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
+ nvlist_t *in, nvlist_t **out)
+{
+ uint8_t version;
+ uint32_t cpuid;
+ uint64_t nvlserid;
+ uint32_t rval = FMD_OBJ_STATE_NOT_PRESENT;
+ md_cpumap_t *mcmp;
+ md_info_t *chip = (md_info_t *)topo_mod_getspecific(mod);
+
+ /*
+ * Get the physical cpuid
+ */
+ if (nvlist_lookup_uint8(in, FM_VERSION, &version) != 0 ||
+ version > FM_CPU_SCHEME_VERSION ||
+ nvlist_lookup_uint32(in, FM_FMRI_CPU_ID, &cpuid) != 0) {
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ }
+
+ /*
+ * Find the cpuid entry
+ * If the input nvl contains a serial number, the cpu is identified
+ * by a tuple <cpuid, cpuserial>
+ * Otherwise, the cpu is identified by the <cpuid>.
+ */
+ if ((mcmp = cpu_find_cpumap(chip, cpuid)) != NULL) {
+ if (nvlist_lookup_uint64(in, FM_FMRI_CPU_SERIAL_ID, &nvlserid)
+ == 0)
+ rval = (nvlserid == mcmp->cpumap_serialno) ?
+ FMD_OBJ_STATE_STILL_PRESENT :
+ FMD_OBJ_STATE_REPLACED;
+ else
+ rval = FMD_OBJ_STATE_UNKNOWN;
+ }
+
+ /* return the replaced status */
+ if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0)
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ if (nvlist_add_uint32(*out, TOPO_METH_REPLACED_RET, rval) != 0) {
+ nvlist_free(*out);
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
cpu_expand(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
nvlist_t *in, nvlist_t **out)
{
diff --git a/usr/src/lib/fm/topo/modules/sun4v/platform-mem/mem.c b/usr/src/lib/fm/topo/modules/sun4v/platform-mem/mem.c
index 805ec3ab03..600e21873b 100644
--- a/usr/src/lib/fm/topo/modules/sun4v/platform-mem/mem.c
+++ b/usr/src/lib/fm/topo/modules/sun4v/platform-mem/mem.c
@@ -29,6 +29,7 @@
#include <strings.h>
#include <umem.h>
#include <fm/topo_mod.h>
+#include <fm/fmd_fmri.h>
#include <sys/fm/protocol.h>
#include <sys/mem.h>
@@ -37,7 +38,7 @@
/*
* This enumerator creates mem-schemed nodes for each dimm found in the
* sun4v Physical Resource Inventory (PRI).
- * Each node exports four methods: present(), expand(), unusable(),
+ * Each node exports five methods: present(), expand(), unusable(), replaced(),
* and contains().
*
*/
@@ -53,6 +54,8 @@ static int mem_enum(topo_mod_t *, tnode_t *, const char *, topo_instance_t,
static void mem_release(topo_mod_t *, tnode_t *);
static int mem_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
nvlist_t **);
+static int mem_replaced(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
+ nvlist_t **);
static int mem_expand(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
nvlist_t **);
static int mem_unusable(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
@@ -69,6 +72,8 @@ static const topo_modinfo_t mem_info =
static const topo_method_t mem_methods[] = {
{ TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC,
TOPO_METH_PRESENT_VERSION, TOPO_STABILITY_INTERNAL, mem_present },
+ { TOPO_METH_REPLACED, TOPO_METH_REPLACED_DESC,
+ TOPO_METH_REPLACED_VERSION, TOPO_STABILITY_INTERNAL, mem_replaced },
{ TOPO_METH_EXPAND, TOPO_METH_EXPAND_DESC,
TOPO_METH_EXPAND_VERSION, TOPO_STABILITY_INTERNAL, mem_expand },
{ TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC,
@@ -166,6 +171,45 @@ mem_present(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
return (0);
}
+/*ARGSUSED*/
+static int
+mem_replaced(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
+ nvlist_t *in, nvlist_t **out)
+{
+ uint8_t version;
+ char **nvlserids;
+ size_t n, nserids;
+ uint32_t rval = FMD_OBJ_STATE_NOT_PRESENT;
+ md_mem_info_t *mem = (md_mem_info_t *)topo_mod_getspecific(mod);
+
+ /* sun4v platforms all support dimm serial numbers */
+
+ if (nvlist_lookup_uint8(in, FM_VERSION, &version) != 0 ||
+ version > FM_MEM_SCHEME_VERSION ||
+ nvlist_lookup_string_array(in, FM_FMRI_MEM_SERIAL_ID,
+ &nvlserids, &nserids) != 0) {
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ }
+
+ /* Find the dimm entry */
+ for (n = 0; n < nserids; n++) {
+ if (mem_get_dimm_by_sn(nvlserids[n], mem) != NULL) {
+ rval = FMD_OBJ_STATE_STILL_PRESENT;
+ break;
+ }
+ }
+
+ /* return the replaced status */
+ if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0)
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ if (nvlist_add_uint32(*out, TOPO_METH_REPLACED_RET, rval) != 0) {
+ nvlist_free(*out);
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ }
+
+ return (0);
+}
+
void
mem_strarray_free(topo_mod_t *mod, char **arr, size_t dim)
{
diff --git a/usr/src/lib/libdevinfo/devinfo.c b/usr/src/lib/libdevinfo/devinfo.c
index 582aad2156..db6a62cab1 100644
--- a/usr/src/lib/libdevinfo/devinfo.c
+++ b/usr/src/lib/libdevinfo/devinfo.c
@@ -978,6 +978,8 @@ di_state(di_node_t node)
result |= DI_DEVICE_OFFLINE;
if (DI_NODE(node)->state & DEVI_DEVICE_DOWN)
result |= DI_DEVICE_OFFLINE;
+ if (DI_NODE(node)->state & DEVI_DEVICE_DEGRADED)
+ result |= DI_DEVICE_DEGRADED;
if (DI_NODE(node)->state & DEVI_BUS_QUIESCED)
result |= DI_BUS_QUIESCED;
if (DI_NODE(node)->state & DEVI_BUS_DOWN)
diff --git a/usr/src/lib/libdevinfo/libdevinfo.h b/usr/src/lib/libdevinfo/libdevinfo.h
index ca745b3b31..0bea41674c 100644
--- a/usr/src/lib/libdevinfo/libdevinfo.h
+++ b/usr/src/lib/libdevinfo/libdevinfo.h
@@ -82,6 +82,7 @@ extern "C" {
#define DI_DRIVER_DETACHED 0x8000
#define DI_DEVICE_OFFLINE 0x1
#define DI_DEVICE_DOWN 0x2
+#define DI_DEVICE_DEGRADED 0x4
#define DI_BUS_QUIESCED 0x100
#define DI_BUS_DOWN 0x200
diff --git a/usr/src/uts/common/os/devcfg.c b/usr/src/uts/common/os/devcfg.c
index aff1945f7f..73d6980654 100644
--- a/usr/src/uts/common/os/devcfg.c
+++ b/usr/src/uts/common/os/devcfg.c
@@ -7643,9 +7643,6 @@ e_ddi_retire_finalize(dev_info_t *dip, void *arg)
(void *)dip));
if (finalize)
e_ddi_offline_finalize(dip, DDI_FAILURE);
- mutex_enter(&DEVI(dip)->devi_lock);
- DEVI_SET_DEVICE_DEGRADED(dip);
- mutex_exit(&DEVI(dip)->devi_lock);
}
/*
diff --git a/usr/src/uts/common/sys/fm/protocol.h b/usr/src/uts/common/sys/fm/protocol.h
index 4c65506f4b..b7eb23ab09 100644
--- a/usr/src/uts/common/sys/fm/protocol.h
+++ b/usr/src/uts/common/sys/fm/protocol.h
@@ -56,6 +56,8 @@ extern "C" {
#define FM_LIST_SUSPECT_CLASS FM_LIST_EVENT ".suspect"
#define FM_LIST_ISOLATED_CLASS FM_LIST_EVENT ".isolated"
#define FM_LIST_REPAIRED_CLASS FM_LIST_EVENT ".repaired"
+#define FM_LIST_UPDATED_CLASS FM_LIST_EVENT ".updated"
+#define FM_LIST_RESOLVED_CLASS FM_LIST_EVENT ".resolved"
/* ereport class subcategory values */
#define FM_ERROR_CPU "cpu"
@@ -72,7 +74,10 @@ extern "C" {
/* list.* event payload member names */
#define FM_LIST_EVENT_SIZE "list-sz"
-/* list.suspect, isolated, and repaired versions and payload member names */
+/*
+ * list.suspect, isolated, updated, repaired and resolved
+ * versions/payload member names.
+ */
#define FM_SUSPECT_UUID "uuid"
#define FM_SUSPECT_DIAG_CODE "code"
#define FM_SUSPECT_DIAG_TIME "diag-time"
@@ -90,6 +95,10 @@ extern "C" {
#define FM_SUSPECT_FAULTY 0x1
#define FM_SUSPECT_UNUSABLE 0x2
#define FM_SUSPECT_NOT_PRESENT 0x4
+#define FM_SUSPECT_DEGRADED 0x8
+#define FM_SUSPECT_REPAIRED 0x10
+#define FM_SUSPECT_REPLACED 0x20
+#define FM_SUSPECT_ACQUITTED 0x40
/* fault event versions and payload member names */
#define FM_FAULT_VERS0 0
@@ -111,6 +120,9 @@ extern "C" {
#define FM_RSRC_ASRU_UUID "uuid"
#define FM_RSRC_ASRU_CODE "code"
#define FM_RSRC_ASRU_FAULTY "faulty"
+#define FM_RSRC_ASRU_REPAIRED "repaired"
+#define FM_RSRC_ASRU_REPLACED "replaced"
+#define FM_RSRC_ASRU_ACQUITTED "acquitted"
#define FM_RSRC_ASRU_UNUSABLE "unusable"
#define FM_RSRC_ASRU_EVENT "event"