summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Hanson <Stephen.Hanson@Sun.COM>2009-03-20 02:22:05 -0700
committerStephen Hanson <Stephen.Hanson@Sun.COM>2009-03-20 02:22:05 -0700
commitcbf75e67acb6c32a2f4884f28a839d59f7988d37 (patch)
tree797d512a8c6a0afbe2083be228bb43cce9be5c93
parentaa4b59d395817702a402da9bd7a40537fcbff526 (diff)
downloadillumos-gate-cbf75e67acb6c32a2f4884f28a839d59f7988d37.tar.gz
6533823 need better way of proxying faults across event transport
6788551 provide means for faults/defects to be directly injected into fmd
-rw-r--r--usr/src/cmd/fm/fmadm/common/faulty.c64
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd.c4
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_api.c48
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_api.h6
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_api.map1
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_asru.c528
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_asru.h61
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_case.c345
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_case.h27
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_ckpt.c14
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_dispq.c5
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_mdb.c64
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_protocol.c41
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_protocol.h10
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c92
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_sysevent.c7
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_xprt.c625
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_xprt.h19
-rw-r--r--usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c3
-rw-r--r--usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c6
-rw-r--r--usr/src/cmd/fm/modules/common/io-retire/rio_main.c15
-rw-r--r--usr/src/cmd/fm/modules/common/ip-transport/ip.c47
-rw-r--r--usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c13
-rw-r--r--usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c12
-rw-r--r--usr/src/uts/common/sys/fm/protocol.h2
25 files changed, 1640 insertions, 419 deletions
diff --git a/usr/src/cmd/fm/fmadm/common/faulty.c b/usr/src/cmd/fm/fmadm/common/faulty.c
index f89feb2ab4..378f724153 100644
--- a/usr/src/cmd/fm/fmadm/common/faulty.c
+++ b/usr/src/cmd/fm/fmadm/common/faulty.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -193,6 +193,7 @@ typedef struct host_id {
char *chassis;
char *server;
char *platform;
+ char *domain;
} hostid_t;
typedef struct host_id_list {
@@ -265,7 +266,7 @@ format_date(char *buf, size_t len, uint64_t sec)
}
static hostid_t *
-find_hostid_in_list(char *platform, char *chassis, char *server)
+find_hostid_in_list(char *platform, char *chassis, char *server, char *domain)
{
hostid_t *rt = NULL;
host_id_list_t *hostp;
@@ -281,7 +282,9 @@ find_hostid_in_list(char *platform, char *chassis, char *server)
hostp->hostid.server &&
strcmp(hostp->hostid.server, server) == 0 &&
(chassis == NULL || hostp->hostid.chassis == NULL ||
- strcmp(chassis, hostp->hostid.chassis) == 0)) {
+ strcmp(chassis, hostp->hostid.chassis) == 0) &&
+ (domain == NULL || hostp->hostid.domain == NULL ||
+ strcmp(domain, hostp->hostid.domain) == 0)) {
rt = &hostp->hostid;
break;
}
@@ -292,6 +295,7 @@ find_hostid_in_list(char *platform, char *chassis, char *server)
hostp->hostid.platform = strdup(platform);
hostp->hostid.server = strdup(server);
hostp->hostid.chassis = chassis ? strdup(chassis) : NULL;
+ hostp->hostid.domain = domain ? strdup(domain) : NULL;
hostp->next = host_list;
host_list = hostp;
rt = &hostp->hostid;
@@ -303,7 +307,7 @@ find_hostid_in_list(char *platform, char *chassis, char *server)
static hostid_t *
find_hostid(nvlist_t *nvl)
{
- char *platform = NULL, *chassis = NULL, *server = NULL;
+ char *platform = NULL, *chassis = NULL, *server = NULL, *domain = NULL;
nvlist_t *auth, *fmri;
hostid_t *rt = NULL;
@@ -314,7 +318,8 @@ find_hostid(nvlist_t *nvl)
(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_SERVER, &server);
(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_CHASSIS,
&chassis);
- rt = find_hostid_in_list(platform, chassis, server);
+ (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_DOMAIN, &domain);
+ rt = find_hostid_in_list(platform, chassis, server, domain);
}
return (rt);
}
@@ -1367,6 +1372,33 @@ print_fru_status(int status, char *label)
}
static void
+print_rsrc_status(int status, char *label)
+{
+ char *msg = "";
+
+ if (status & FM_SUSPECT_NOT_PRESENT)
+ msg = dgettext("FMD", "not present");
+ else if (status & FM_SUSPECT_FAULTY) {
+ if (status & FM_SUSPECT_DEGRADED)
+ msg = dgettext("FMD",
+ "faulted but still providing degraded service");
+ else if (status & FM_SUSPECT_UNUSABLE)
+ msg = dgettext("FMD",
+ "faulted and taken out of service");
+ else
+ msg = dgettext("FMD", "faulted but still in service");
+ } else if (status & FM_SUSPECT_REPLACED)
+ msg = dgettext("FMD", "replaced");
+ else if (status & FM_SUSPECT_REPAIRED)
+ msg = dgettext("FMD", "repair attempted");
+ else if (status & FM_SUSPECT_ACQUITTED)
+ msg = dgettext("FMD", "acquitted");
+ else
+ msg = dgettext("FMD", "removed");
+ (void) printf("%s %s\n", label, msg);
+}
+
+static void
print_name_list(name_list_t *list, char *label, char *(func)(char *),
int limit, int pct, void (func1)(int, char *), int full)
{
@@ -1476,13 +1508,6 @@ serial_in_fru(name_list_t *fru, name_list_t *serial)
}
static void
-print_server_name(hostid_t *host, char *label)
-{
- (void) printf("%s %s %s %s\n", label, host->server, host->platform,
- host->chassis ? host->chassis : "");
-}
-
-static void
print_sup_record(status_record_t *srp, int opt_i, int full)
{
char buf[32];
@@ -1521,8 +1546,15 @@ print_sup_record(status_record_t *srp, int opt_i, int full)
n++;
}
(void) printf("\n");
- if (n_server > 1)
- print_server_name(srp->host, dgettext("FMD", "Host :"));
+ (void) printf("%s %s", dgettext("FMD", "Host :"),
+ srp->host->server);
+ if (srp->host->domain)
+ (void) printf("\t%s %s", dgettext("FMD", "Domain :"),
+ srp->host->domain);
+ (void) printf("\n%s %s", dgettext("FMD", "Platform :"),
+ srp->host->platform);
+ (void) printf("\t%s %s\n\n", dgettext("FMD", "Chassis_id :"),
+ srp->host->chassis ? srp->host->chassis : "");
if (srp->class)
print_name_list(srp->class,
dgettext("FMD", "Fault class :"), NULL, 0, srp->class->pct,
@@ -1539,11 +1571,11 @@ print_sup_record(status_record_t *srp, int opt_i, int full)
dgettext("FMD", "Affects :"), NULL,
full ? 0 : max_display, 0, print_asru_status, full);
}
- if (full || srp->fru == NULL) {
+ if (full || srp->fru == NULL || srp->asru == NULL) {
if (srp->resource) {
print_name_list(srp->resource,
dgettext("FMD", "Problem in :"),
- NULL, full ? 0 : max_display, 0, print_fru_status,
+ NULL, full ? 0 : max_display, 0, print_rsrc_status,
full);
}
}
diff --git a/usr/src/cmd/fm/fmd/common/fmd.c b/usr/src/cmd/fm/fmd/common/fmd.c
index 903ad1d9b5..d1569d5f70 100644
--- a/usr/src/cmd/fm/fmd/common/fmd.c
+++ b/usr/src/cmd/fm/fmd/common/fmd.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -584,7 +584,7 @@ fmd_destroy(fmd_t *dp)
fmd_module_lock(dp->d_rmod);
while ((cp = fmd_list_next(&dp->d_rmod->mod_cases)) != NULL)
- fmd_case_discard(cp);
+ fmd_case_discard(cp, B_FALSE);
fmd_module_unlock(dp->d_rmod);
fmd_free(dp->d_rmod->mod_stats, sizeof (fmd_modstat_t));
diff --git a/usr/src/cmd/fm/fmd/common/fmd_api.c b/usr/src/cmd/fm/fmd/common/fmd_api.c
index 85323d3281..bfad22b0db 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_api.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_api.c
@@ -1142,7 +1142,15 @@ fmd_case_uuresolved(fmd_hdl_t *hdl, const char *uuid)
fmd_case_t *cp = fmd_case_hash_lookup(fmd.d_cases, uuid);
if (cp != NULL) {
- fmd_case_transition(cp, FMD_CASE_RESOLVED, 0);
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+ /*
+ * For a proxy, we notify the diagnosing side, and then
+ * wait for it to send us back a list.resolved.
+ */
+ if (cip->ci_xprt != NULL)
+ fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid);
+ else
+ fmd_case_transition(cp, FMD_CASE_RESOLVED, 0);
fmd_case_rele(cp);
}
@@ -2460,6 +2468,44 @@ fmd_xprt_translate(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *ep)
return (fmd_xprt_xtranslate(FMD_EVENT_NVL(ep), xip->xi_auth));
}
+/*ARGSUSED*/
+void
+fmd_xprt_add_domain(fmd_hdl_t *hdl, nvlist_t *nvl, char *domain)
+{
+ nvpair_t *nvp, *nvp2;
+ nvlist_t *nvl2, *nvl3;
+ char *class;
+
+ if (nvl == NULL || domain == NULL)
+ return;
+ for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL;
+ nvp = nvlist_next_nvpair(nvl, nvp)) {
+ if (strcmp(nvpair_name(nvp), FM_CLASS) == 0) {
+ (void) nvpair_value_string(nvp, &class);
+ if (strcmp(class, FM_LIST_SUSPECT_CLASS) != 0)
+ return;
+ }
+ }
+ for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL;
+ nvp = nvlist_next_nvpair(nvl, nvp)) {
+ if (strcmp(nvpair_name(nvp), FM_SUSPECT_DE) == 0) {
+ (void) nvpair_value_nvlist(nvp, &nvl2);
+ for (nvp2 = nvlist_next_nvpair(nvl2, NULL);
+ nvp2 != NULL;
+ nvp2 = nvlist_next_nvpair(nvl2, nvp2)) {
+ if (strcmp(nvpair_name(nvp2),
+ FM_FMRI_AUTHORITY) == 0) {
+ (void) nvpair_value_nvlist(nvp2, &nvl3);
+ (void) nvlist_add_string(nvl3,
+ FM_FMRI_AUTH_DOMAIN, domain);
+ break;
+ }
+ }
+ break;
+ }
+ }
+}
+
void
fmd_xprt_setspecific(fmd_hdl_t *hdl, fmd_xprt_t *xp, void *data)
{
diff --git a/usr/src/cmd/fm/fmd/common/fmd_api.h b/usr/src/cmd/fm/fmd/common/fmd_api.h
index 57d2ef2ef6..f6649875c4 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_api.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_api.h
@@ -251,6 +251,11 @@ extern uint64_t fmd_event_ena_create(fmd_hdl_t *);
#define FMD_XPRT_RDWR 0x3 /* transport is read-write */
#define FMD_XPRT_ACCEPT 0x4 /* transport is accepting connection */
#define FMD_XPRT_SUSPENDED 0x8 /* transport starts suspended */
+#define FMD_XPRT_EXTERNAL 0x80 /* xprt is external to a chassis */
+#define FMD_XPRT_NO_REMOTE_REPAIR 0x100 /* xprt does not allow remote repair */
+#define FMD_XPRT_CACHE_AS_LOCAL 0x200 /* xprt caches fault as if local */
+#define FMD_XPRT_HCONLY 0x400 /* xprt only proxies hc-scheme faults */
+#define FMD_XPRT_HC_PRESENT_ONLY 0x800 /* only locally present hc faults */
extern fmd_xprt_t *fmd_xprt_open(fmd_hdl_t *, uint_t, nvlist_t *, void *);
extern void fmd_xprt_close(fmd_hdl_t *, fmd_xprt_t *);
@@ -260,6 +265,7 @@ extern void fmd_xprt_suspend(fmd_hdl_t *, fmd_xprt_t *);
extern void fmd_xprt_resume(fmd_hdl_t *, fmd_xprt_t *);
extern int fmd_xprt_error(fmd_hdl_t *, fmd_xprt_t *);
extern nvlist_t *fmd_xprt_translate(fmd_hdl_t *, fmd_xprt_t *, fmd_event_t *);
+extern void fmd_xprt_add_domain(fmd_hdl_t *, nvlist_t *, char *);
extern void fmd_xprt_setspecific(fmd_hdl_t *, fmd_xprt_t *, void *);
extern void *fmd_xprt_getspecific(fmd_hdl_t *, fmd_xprt_t *);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_api.map b/usr/src/cmd/fm/fmd/common/fmd_api.map
index 791a3e6593..1ac97bd09b 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_api.map
+++ b/usr/src/cmd/fm/fmd/common/fmd_api.map
@@ -126,4 +126,5 @@
fmd_xprt_setspecific = FUNCTION extern;
fmd_xprt_suspend = FUNCTION extern;
fmd_xprt_translate = FUNCTION extern;
+ fmd_xprt_add_domain = FUNCTION extern;
};
diff --git a/usr/src/cmd/fm/fmd/common/fmd_asru.c b/usr/src/cmd/fm/fmd/common/fmd_asru.c
index 5ed622330a..04b97efe6e 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_asru.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -161,11 +161,15 @@ fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
return (ap);
}
+#define HC_ONLY_FALSE 0
+#define HC_ONLY_TRUE 1
+
static int
-fmd_asru_replacement_state(nvlist_t *event)
+fmd_asru_replacement_state(nvlist_t *event, int hc_only)
{
int ps = -1;
nvlist_t *asru, *fru, *rsrc;
+ char *s;
/*
* Check if there is evidence that this object is no longer present.
@@ -177,34 +181,46 @@ fmd_asru_replacement_state(nvlist_t *event)
* If we have checked all three and we still get -1 then nothing knows
* whether it's present or not, so err on the safe side and treat it
* as still present.
+ *
+ * Note that if hc_only is set, then we only check status using fmris
+ * that are in hc-scheme.
*/
if (fmd_asru_fake_not_present)
return (fmd_asru_fake_not_present);
- if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0)
+ if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0 &&
+ (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(asru,
+ FM_FMRI_SCHEME, &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0)))
ps = fmd_fmri_replaced(asru);
- if (ps == -1) {
- if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0)
- ps = fmd_fmri_replaced(rsrc);
- } else if (ps == FMD_OBJ_STATE_UNKNOWN) {
- /* see if we can improve on UNKNOWN */
+ if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) {
if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
- &rsrc) == 0) {
- int ps2 = fmd_fmri_replaced(rsrc);
- if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
- ps2 == FMD_OBJ_STATE_REPLACED)
- ps = ps2;
+ &rsrc) == 0 && (hc_only == HC_ONLY_FALSE ||
+ (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &s) == 0 &&
+ strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
+ if (ps == -1) {
+ ps = fmd_fmri_replaced(rsrc);
+ } else {
+ /* see if we can improve on UNKNOWN */
+ int ps2 = fmd_fmri_replaced(rsrc);
+ if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
+ ps2 == FMD_OBJ_STATE_REPLACED)
+ ps = ps2;
+ }
}
}
- if (ps == -1) {
- if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0)
- ps = fmd_fmri_replaced(fru);
- } else if (ps == FMD_OBJ_STATE_UNKNOWN) {
- /* see if we can improve on UNKNOWN */
- if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) {
- int ps2 = fmd_fmri_replaced(fru);
- if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
- ps2 == FMD_OBJ_STATE_REPLACED)
- ps = ps2;
+ if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) {
+ if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0 &&
+ (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(fru,
+ FM_FMRI_SCHEME, &s) == 0 &&
+ strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
+ if (ps == -1) {
+ ps = fmd_fmri_replaced(fru);
+ } else {
+ /* see if we can improve on UNKNOWN */
+ int ps2 = fmd_fmri_replaced(fru);
+ if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
+ ps2 == FMD_OBJ_STATE_REPLACED)
+ ps = ps2;
+ }
}
}
if (ps == -1)
@@ -432,6 +448,7 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
fmd_asru_link_t *alp;
fmd_case_t *cp;
int64_t *diag_time;
+ nvlist_t *de_fmri, *de_fmri_dup;
uint_t nelem;
topo_hdl_t *thp;
char *class;
@@ -482,6 +499,10 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
fmd_case_settime(cp, diag_time[0], diag_time[1]);
else
fmd_case_settime(cp, lp->log_stat.st_ctime, 0);
+ if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) {
+ (void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva);
+ fmd_case_set_de_fmri(cp, de_fmri_dup);
+ }
(void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
/*
@@ -511,7 +532,7 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
/*
* Check to see if the resource is still present in the system.
*/
- ps = fmd_asru_replacement_state(flt);
+ ps = fmd_asru_replacement_state(flt, HC_ONLY_FALSE);
if (ps == FMD_OBJ_STATE_REPLACED) {
replaced = FMD_B_TRUE;
} else if (ps == FMD_OBJ_STATE_STILL_PRESENT ||
@@ -689,18 +710,33 @@ fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg)
hrtime_t hrt;
int ps;
int err;
+ fmd_asru_rep_arg_t fara;
+
+ /*
+ * Checking for aged resources only happens on the diagnosing side
+ * not on a proxy.
+ */
+ if (alp->al_flags & FMD_ASRU_PROXY)
+ return;
- ps = fmd_asru_replacement_state(alp->al_event);
+ ps = fmd_asru_replacement_state(alp->al_event, HC_ONLY_FALSE);
if (ps == FMD_OBJ_STATE_REPLACED) {
- fmd_asru_replaced(alp, &err);
+ fara.fara_reason = FMD_ASRU_REPLACED;
+ fara.fara_bywhat = FARA_ALL;
+ fara.fara_rval = &err;
+ fmd_asru_repaired(alp, &fara);
} else if (ps == FMD_OBJ_STATE_NOT_PRESENT) {
fmd_time_gettimeofday(&tv);
lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid,
FMD_LOG_ASRU);
hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
fmd_log_rele(lp);
- if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime)
- fmd_asru_removed(alp);
+ if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) {
+ fara.fara_reason = FMD_ASRU_REMOVED;
+ fara.fara_bywhat = FARA_ALL;
+ fara.fara_rval = &err;
+ fmd_asru_repaired(alp, &fara);
+ }
}
}
@@ -1103,7 +1139,7 @@ fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp)
*/
(void) snprintf(path, sizeof (path), "%s/%s",
ahp->ah_dirpath, alp->al_uuid);
- if (unlink(path) != 0)
+ if (cip->ci_xprt == NULL && unlink(path) != 0)
fmd_error(EFMD_ASRU_UNLINK,
"failed to unlink asru %s", path);
@@ -1142,171 +1178,237 @@ fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp)
(void) pthread_rwlock_unlock(&ahp->ah_lock);
}
+typedef struct {
+ nvlist_t *farc_parent_fmri;
+ uint8_t farc_reason;
+} fmd_asru_farc_t;
+
static void
-fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er)
+fmd_asru_repair_containee(fmd_asru_link_t *alp, void *arg)
{
- if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
- alp->al_asru_fmri && fmd_fmri_contains(er,
- alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
- FMD_ASRU_REPAIRED))
- fmd_case_update(alp->al_case);
+ fmd_asru_farc_t *farcp = (fmd_asru_farc_t *)arg;
+
+ if ((alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
+ alp->al_asru_fmri &&
+ fmd_fmri_contains(farcp->farc_parent_fmri, alp->al_asru_fmri) > 0) {
+ if (fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+ farcp->farc_reason)) {
+ if (alp->al_flags & FMD_ASRU_PROXY)
+ fmd_case_xprt_updated(alp->al_case);
+ else
+ fmd_case_update(alp->al_case);
+ }
+ }
}
-void
-fmd_asru_repaired(fmd_asru_link_t *alp, void *er)
+static void
+fmd_asru_do_repair_containees(fmd_asru_link_t *alp, uint8_t reason)
{
int flags;
- int rval;
-
- /*
- * repair this asru cache entry
- */
- rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED);
/*
- * now check if all entries associated with this asru are repaired and
- * if so repair containees
+ * Check if all entries associated with this asru are acquitted and
+ * if so acquit containees. Don't try to repair containees on proxy
+ * side unless we have local asru.
*/
- (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
- flags = alp->al_asru->asru_flags;
- (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
- if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE)))
- fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee,
- alp->al_asru_fmri);
-
- /*
- * if called from fmd_adm_repair() and we really did clear the bit then
- * we need to do a case update to see if the associated case can be
- * repaired. No need to do this if called from fmd_case_repair() (ie
- * when er is NULL) as the case will be explicitly repaired anyway.
- */
- if (er) {
- *(int *)er = 0;
- if (rval)
- fmd_case_update(alp->al_case);
+ if (alp->al_asru_fmri != NULL && (!(alp->al_flags & FMD_ASRU_PROXY) ||
+ (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU))) {
+ (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
+ flags = alp->al_asru->asru_flags;
+ (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
+ if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) {
+ fmd_asru_farc_t farc;
+
+ farc.farc_parent_fmri = alp->al_asru_fmri;
+ farc.farc_reason = reason;
+ fmd_asru_al_hash_apply(fmd.d_asrus,
+ fmd_asru_repair_containee, &farc);
+ }
}
}
-static void
-fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er)
-{
- if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
- alp->al_asru_fmri && fmd_fmri_contains(er,
- alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
- FMD_ASRU_ACQUITTED))
- fmd_case_update(alp->al_case);
-}
-
void
-fmd_asru_acquit(fmd_asru_link_t *alp, void *er)
+fmd_asru_repaired(fmd_asru_link_t *alp, void *arg)
{
- int flags;
- int rval;
+ int cleared;
+ fmd_asru_rep_arg_t *farap = (fmd_asru_rep_arg_t *)arg;
/*
- * acquit this asru cache entry
+ * don't allow remote repair over readonly transport
*/
- rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED);
+ if (alp->al_flags & FMD_ASRU_PROXY_RDONLY)
+ return;
/*
- * now check if all entries associated with this asru are acquitted and
- * if so acquit containees
+ * don't allow repair etc by asru on proxy unless asru is local
*/
- (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
- flags = alp->al_asru->asru_flags;
- (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
- if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE)))
- fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee,
- alp->al_asru_fmri);
+ if (farap->fara_bywhat == FARA_BY_ASRU &&
+ (alp->al_flags & FMD_ASRU_PROXY) &&
+ !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU))
+ return;
+ /*
+ * For acquit, need to check both name and uuid if specified
+ */
+ if (farap->fara_reason == FMD_ASRU_ACQUITTED &&
+ farap->fara_rval != NULL && strcmp(farap->fara_uuid, "") != 0 &&
+ strcmp(farap->fara_uuid, alp->al_case_uuid) != 0)
+ return;
/*
- * if called from fmd_adm_acquit() and we really did clear the bit then
- * we need to do a case update to see if the associated case can be
- * repaired. No need to do this if called from fmd_case_acquit() (ie
- * when er is NULL) as the case will be explicitly repaired anyway.
+ * For replaced, verify it has been replaced if we have serial number
*/
- if (er) {
- *(int *)er = 0;
- if (rval)
- fmd_case_update(alp->al_case);
+ if (farap->fara_reason == FMD_ASRU_REPLACED &&
+ !(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL) &&
+ fmd_asru_replacement_state(alp->al_event,
+ (alp->al_flags & FMD_ASRU_PROXY) ? HC_ONLY_TRUE : HC_ONLY_FALSE) ==
+ FMD_OBJ_STATE_STILL_PRESENT) {
+ return;
}
-}
-static void
-fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er)
-{
- if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
- alp->al_asru_fmri && fmd_fmri_contains(er,
- alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
- FMD_ASRU_REPLACED))
- fmd_case_update(alp->al_case);
+ cleared = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, farap->fara_reason);
+ fmd_asru_do_repair_containees(alp, farap->fara_reason);
+
+ /*
+ * if called from fmd_adm_*() and we really did clear the bit then
+ * we need to do a case update to see if the associated case can be
+ * repaired. No need to do this if called from fmd_case_*() (ie
+ * when arg is NULL) as the case will be explicitly repaired anyway.
+ */
+ if (farap->fara_rval) {
+ *farap->fara_rval = 0;
+ if (cleared) {
+ if (alp->al_flags & FMD_ASRU_PROXY)
+ fmd_case_xprt_updated(alp->al_case);
+ else
+ fmd_case_update(alp->al_case);
+ }
+ }
}
+/*
+ * This is only called for proxied faults. Set various flags so we can
+ * find the nature of the transport from the resource cache code.
+ */
+/*ARGSUSED*/
void
-fmd_asru_replaced(fmd_asru_link_t *alp, void *er)
+fmd_asru_set_on_proxy(fmd_asru_link_t *alp, void *arg)
{
- int flags;
- int rval;
- int ps;
+ fmd_asru_set_on_proxy_t *entryp = (fmd_asru_set_on_proxy_t *)arg;
- ps = fmd_asru_replacement_state(alp->al_event);
- if (ps == FMD_OBJ_STATE_STILL_PRESENT)
+ if (*entryp->fasp_countp >= entryp->fasp_maxcount)
return;
/*
- * mark this cache entry as replaced
+ * Note that this is a proxy fault and save whetehr transport is
+ * RDONLY or EXTERNAL.
*/
- rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED);
+ alp->al_flags |= FMD_ASRU_PROXY;
+ alp->al_asru->asru_flags |= FMD_ASRU_PROXY;
+
+ if (entryp->fasp_proxy_external) {
+ alp->al_flags |= FMD_ASRU_PROXY_EXTERNAL;
+ alp->al_asru->asru_flags |= FMD_ASRU_PROXY_EXTERNAL;
+ }
+
+ if (entryp->fasp_proxy_rdonly)
+ alp->al_flags |= FMD_ASRU_PROXY_RDONLY;
/*
- * now check if all entries associated with this asru are replaced and
- * if so replace containees
+ * Save whether asru is accessible in local domain
*/
- (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
- flags = alp->al_asru->asru_flags;
- (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
- if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE)))
- fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee,
- alp->al_asru_fmri);
-
- *(int *)er = 0;
- if (rval)
- fmd_case_update(alp->al_case);
+ if (entryp->fasp_proxy_asru[*entryp->fasp_countp]) {
+ alp->al_flags |= FMD_ASRU_PROXY_WITH_ASRU;
+ alp->al_asru->asru_flags |= FMD_ASRU_PROXY_WITH_ASRU;
+ }
+ (*entryp->fasp_countp)++;
}
-static void
-fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er)
+/*ARGSUSED*/
+void
+fmd_asru_update_containees(fmd_asru_link_t *alp, void *arg)
{
- if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
- alp->al_asru_fmri && fmd_fmri_contains(er,
- alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
- 0))
- fmd_case_update(alp->al_case);
+ fmd_asru_do_repair_containees(alp, alp->al_reason);
}
+/*
+ * This function is used for fault proxying. It updates the resource status in
+ * the resource cache based on information that has come from the other side of
+ * the transport. This can be called on either the proxy side or the
+ * diagnosing side.
+ */
void
-fmd_asru_removed(fmd_asru_link_t *alp)
+fmd_asru_update_status(fmd_asru_link_t *alp, void *arg)
{
- int flags;
- int rval;
+ fmd_asru_update_status_t *entryp = (fmd_asru_update_status_t *)arg;
+ uint8_t status;
+
+ if (*entryp->faus_countp >= entryp->faus_maxcount)
+ return;
+
+ status = entryp->faus_ba[*entryp->faus_countp];
/*
- * mark this cache entry as replacded
+ * For proxy, if there is no asru on the proxy side, but there is on
+ * the diag side, then take the diag side asru status.
+ * For diag, if there is an asru on the proxy side, then take the proxy
+ * side asru status.
*/
- rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0);
+ if (entryp->faus_is_proxy ?
+ (entryp->faus_diag_asru[*entryp->faus_countp] &&
+ !entryp->faus_proxy_asru[*entryp->faus_countp]) :
+ entryp->faus_proxy_asru[*entryp->faus_countp]) {
+ if (status & FM_SUSPECT_DEGRADED)
+ alp->al_flags |= FMD_ASRU_DEGRADED;
+ else
+ alp->al_flags &= ~FMD_ASRU_DEGRADED;
+ if (status & FM_SUSPECT_UNUSABLE)
+ (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
+ else
+ (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
+ }
/*
- * now check if all entries associated with this asru are removed and
- * if so replace containees
+ * Update the faulty status too.
*/
- (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
- flags = alp->al_asru->asru_flags;
- (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
- if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE)))
- fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee,
- alp->al_asru_fmri);
- if (rval)
- fmd_case_update(alp->al_case);
+ if (!(status & FM_SUSPECT_FAULTY))
+ (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
+ (status & FM_SUSPECT_REPAIRED) ? FMD_ASRU_REPAIRED :
+ (status & FM_SUSPECT_REPLACED) ? FMD_ASRU_REPLACED :
+ (status & FM_SUSPECT_ACQUITTED) ? FMD_ASRU_ACQUITTED :
+ FMD_ASRU_REMOVED);
+ else if (entryp->faus_is_proxy)
+ (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
+
+ /*
+ * for proxy only, update the present status too.
+ */
+ if (entryp->faus_is_proxy) {
+ if (!(status & FM_SUSPECT_NOT_PRESENT)) {
+ alp->al_flags |= FMD_ASRU_PRESENT;
+ alp->al_asru->asru_flags |= FMD_ASRU_PRESENT;
+ } else {
+ alp->al_flags &= ~FMD_ASRU_PRESENT;
+ alp->al_asru->asru_flags &= ~FMD_ASRU_PRESENT;
+ }
+ }
+ (*entryp->faus_countp)++;
+}
+
+/*
+ * This function is called on the diagnosing side when fault proxying is
+ * in use and the proxy has sent a uuclose. It updates the status of the
+ * resource cache entries.
+ */
+void
+fmd_asru_close_status(fmd_asru_link_t *alp, void *arg)
+{
+ fmd_asru_close_status_t *entryp = (fmd_asru_close_status_t *)arg;
+
+ if (*entryp->facs_countp >= entryp->facs_maxcount)
+ return;
+ alp->al_flags &= ~FMD_ASRU_DEGRADED;
+ (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
+ (*entryp->facs_countp)++;
}
static void
@@ -1330,6 +1432,12 @@ fmd_asru_logevent(fmd_asru_link_t *alp)
cip = (fmd_case_impl_t *)alp->al_case;
ASSERT(cip != NULL);
+ /*
+ * Don't log to disk on proxy side
+ */
+ if (cip->ci_xprt != NULL)
+ return;
+
if ((lp = alp->al_log) == NULL)
lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU);
@@ -1338,7 +1446,8 @@ fmd_asru_logevent(fmd_asru_link_t *alp)
nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)],
alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable,
- message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted);
+ message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted,
+ cip->ci_diag_de == NULL ? cip->ci_mod->mod_fmri : cip->ci_diag_de);
(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
@@ -1446,45 +1555,65 @@ fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason)
int
fmd_asru_al_getstate(fmd_asru_link_t *alp)
{
- int us, st;
+ int us, st = (alp->al_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE));
nvlist_t *asru;
- int ps;
+ int ps = FMD_OBJ_STATE_UNKNOWN;
- ps = fmd_asru_replacement_state(alp->al_event);
- if (ps == FMD_OBJ_STATE_NOT_PRESENT)
- return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
- if (ps == FMD_OBJ_STATE_REPLACED) {
- if (alp->al_reason < FMD_ASRU_REPLACED)
- alp->al_reason = FMD_ASRU_REPLACED;
- return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
+ /*
+ * For fault proxying with an EXTERNAL transport, believe the presence
+ * state as sent by the diagnosing side. Otherwise find the presence
+ * state here. Note that if fault proxying with an INTERNAL transport
+ * we can only trust the presence state where we are using hc-scheme
+ * fmris which should be consistant across domains in the same system -
+ * other schemes can refer to different devices in different domains.
+ */
+ if (!(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL)) {
+ ps = fmd_asru_replacement_state(alp->al_event, (alp->al_flags &
+ FMD_ASRU_PROXY)? HC_ONLY_TRUE : HC_ONLY_FALSE);
+ if (ps == FMD_OBJ_STATE_NOT_PRESENT)
+ return (st | FMD_ASRU_UNUSABLE);
+ if (ps == FMD_OBJ_STATE_REPLACED) {
+ if (alp->al_reason < FMD_ASRU_REPLACED)
+ alp->al_reason = FMD_ASRU_REPLACED;
+ return (st | FMD_ASRU_UNUSABLE);
+ }
}
+ if (ps == FMD_OBJ_STATE_UNKNOWN && (alp->al_flags & FMD_ASRU_PROXY))
+ st |= (alp->al_flags & (FMD_ASRU_DEGRADED | FMD_ASRU_PRESENT));
+ else
+ st |= (alp->al_flags & (FMD_ASRU_DEGRADED)) | FMD_ASRU_PRESENT;
- st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT;
- if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) {
+ /*
+ * For fault proxying, unless we have a local ASRU, then believe the
+ * service state sent by the diagnosing side. Otherwise find the service
+ * state here. Try fmd_fmri_service_state() first, but if that's not
+ * supported by the scheme then fall back to fmd_fmri_unusable().
+ */
+ if ((!(alp->al_flags & FMD_ASRU_PROXY) ||
+ (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) &&
+ nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) {
us = fmd_fmri_service_state(asru);
if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) {
/* not supported by scheme - try fmd_fmri_unusable */
us = fmd_fmri_unusable(asru);
- } else if (us == FMD_SERVICE_STATE_UNUSABLE) {
- st |= FMD_ASRU_UNUSABLE;
- return (st);
- } else if (us == FMD_SERVICE_STATE_OK) {
- st &= ~FMD_ASRU_UNUSABLE;
- return (st);
- } else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) {
- st &= ~FMD_ASRU_UNUSABLE;
- return (st);
- } else if (us == FMD_SERVICE_STATE_DEGRADED) {
- st &= ~FMD_ASRU_UNUSABLE;
- st |= FMD_ASRU_DEGRADED;
- return (st);
+ if (us > 0)
+ st |= FMD_ASRU_UNUSABLE;
+ else if (us == 0)
+ st &= ~FMD_ASRU_UNUSABLE;
+ } else {
+ if (us == FMD_SERVICE_STATE_UNUSABLE) {
+ st &= ~FMD_ASRU_DEGRADED;
+ st |= FMD_ASRU_UNUSABLE;
+ } else if (us == FMD_SERVICE_STATE_OK) {
+ st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE);
+ } else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) {
+ st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE);
+ } else if (us == FMD_SERVICE_STATE_DEGRADED) {
+ st &= ~FMD_ASRU_UNUSABLE;
+ st |= FMD_ASRU_DEGRADED;
+ }
}
- } else
- us = (alp->al_flags & FMD_ASRU_UNUSABLE);
- if (us > 0)
- st |= FMD_ASRU_UNUSABLE;
- else if (us == 0)
- st &= ~FMD_ASRU_UNUSABLE;
+ }
return (st);
}
@@ -1499,20 +1628,43 @@ fmd_asru_al_getstate(fmd_asru_link_t *alp)
int
fmd_asru_getstate(fmd_asru_t *ap)
{
- int us, st;
-
- if (!(ap->asru_flags & FMD_ASRU_INTERNAL) &&
- (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED ||
- fmd_fmri_present(ap->asru_fmri) <= 0))
- return (0); /* do not report non-fmd non-present resources */
-
- us = fmd_fmri_unusable(ap->asru_fmri);
- st = ap->asru_flags & FMD_ASRU_STATE;
+ int us, st, p = -1;
+ char *s;
- if (us > 0)
- st |= FMD_ASRU_UNUSABLE;
- else if (us == 0)
- st &= ~FMD_ASRU_UNUSABLE;
+ /* do not report non-fmd non-present resources */
+ if (!(ap->asru_flags & FMD_ASRU_INTERNAL)) {
+ /*
+ * As with fmd_asru_al_getstate(), we can only trust the
+ * local presence state on a proxy if the transport is
+ * internal and the scheme is hc. Otherwise we believe the
+ * state as sent by the diagnosing side.
+ */
+ if (!(ap->asru_flags & FMD_ASRU_PROXY) ||
+ (!(ap->asru_flags & FMD_ASRU_PROXY_EXTERNAL) &&
+ (nvlist_lookup_string(ap->asru_fmri, FM_FMRI_SCHEME,
+ &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
+ if (fmd_asru_fake_not_present >=
+ FMD_OBJ_STATE_REPLACED)
+ return (0);
+ p = fmd_fmri_present(ap->asru_fmri);
+ }
+ if (p == 0 || (p < 0 && !(ap->asru_flags & FMD_ASRU_PROXY) ||
+ !(ap->asru_flags & FMD_ASRU_PRESENT)))
+ return (0);
+ }
+ /*
+ * As with fmd_asru_al_getstate(), we can only trust the local unusable
+ * state on a proxy if there is a local ASRU.
+ */
+ st = ap->asru_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE);
+ if (!(ap->asru_flags & FMD_ASRU_PROXY) ||
+ (ap->asru_flags & FMD_ASRU_PROXY_WITH_ASRU)) {
+ us = fmd_fmri_unusable(ap->asru_fmri);
+ if (us > 0)
+ st |= FMD_ASRU_UNUSABLE;
+ else if (us == 0)
+ st &= ~FMD_ASRU_UNUSABLE;
+ }
return (st);
}
diff --git a/usr/src/cmd/fm/fmd/common/fmd_asru.h b/usr/src/cmd/fm/fmd/common/fmd_asru.h
index f0a5738f3e..c350c801c0 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_asru.h
@@ -20,15 +20,13 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _FMD_ASRU_H
#define _FMD_ASRU_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <pthread.h>
@@ -100,11 +98,16 @@ typedef struct fmd_asru_link {
#define FMD_ASRU_RECREATED 0x20 /* asru recreated by cache replay */
#define FMD_ASRU_PRESENT 0x40 /* asru present at last R$ update */
#define FMD_ASRU_DEGRADED 0x80 /* asru service is degraded */
+#define FMD_ASRU_PROXY 0x100 /* asru on proxy */
+#define FMD_ASRU_PROXY_WITH_ASRU 0x200 /* asru accessible locally on proxy */
+#define FMD_ASRU_PROXY_EXTERNAL 0x400 /* proxy over external transport */
+#define FMD_ASRU_PROXY_RDONLY 0x800 /* proxy over readonly transport */
/*
* Note the following are defined in order of increasing precedence and
* this should not be changed
*/
+#define FMD_ASRU_REMOVED 0 /* asru removed */
#define FMD_ASRU_ACQUITTED 1 /* asru acquitted */
#define FMD_ASRU_REPAIRED 2 /* asru repaired */
#define FMD_ASRU_REPLACED 3 /* asru replaced */
@@ -162,10 +165,56 @@ extern void fmd_asru_hash_release(fmd_asru_hash_t *, fmd_asru_t *);
extern void fmd_asru_hash_delete_case(fmd_asru_hash_t *, fmd_case_t *);
extern void fmd_asru_clear_aged_rsrcs();
+
+/*
+ * flags used in fara_bywhat field in fmd_asru_rep_arg_t
+ */
+#define FARA_ALL 0
+#define FARA_BY_CASE 1
+#define FARA_BY_ASRU 2
+#define FARA_BY_FRU 3
+#define FARA_BY_RSRC 4
+#define FARA_BY_LABEL 5
+
+/*
+ * The following structures are used to pass arguments to the corresponding
+ * function when walking the resource cache by case etc.
+ */
+typedef struct {
+ uint8_t fara_reason; /* repaired, acquit, replaced, removed */
+ uint8_t fara_bywhat; /* whether doing a walk by case, asru, etc */
+ int *fara_rval; /* for return success or failure */
+ char *fara_uuid; /* uuid can be passed in for comparison */
+} fmd_asru_rep_arg_t;
extern void fmd_asru_repaired(fmd_asru_link_t *, void *);
-extern void fmd_asru_acquit(fmd_asru_link_t *, void *);
-extern void fmd_asru_replaced(fmd_asru_link_t *, void *);
-extern void fmd_asru_removed(fmd_asru_link_t *);
+
+typedef struct {
+ int *faus_countp;
+ int faus_maxcount;
+ uint8_t *faus_ba; /* received status for each suspect */
+ uint8_t *faus_proxy_asru; /* asru on proxy for each suspect? */
+ uint8_t *faus_diag_asru; /* asru on diag for each suspect? */
+ boolean_t faus_is_proxy; /* are we on the proxy side? */
+} fmd_asru_update_status_t;
+extern void fmd_asru_update_status(fmd_asru_link_t *alp, void *arg);
+
+typedef struct {
+ int *fasp_countp;
+ int fasp_maxcount;
+ uint8_t *fasp_proxy_asru; /* asru on proxy for each suspect? */
+ int fasp_proxy_external; /* is this an external transport? */
+ int fasp_proxy_rdonly; /* is this a rdonly transport? */
+} fmd_asru_set_on_proxy_t;
+extern void fmd_asru_set_on_proxy(fmd_asru_link_t *alp, void *arg);
+
+extern void fmd_asru_update_containees(fmd_asru_link_t *alp, void *arg);
+
+typedef struct {
+ int *facs_countp;
+ int facs_maxcount;
+} fmd_asru_close_status_t;
+extern void fmd_asru_close_status(fmd_asru_link_t *alp, void *arg);
+
extern int fmd_asru_setflags(fmd_asru_link_t *, uint_t);
extern int fmd_asru_clrflags(fmd_asru_link_t *, uint_t, uint8_t);
extern int fmd_asru_al_getstate(fmd_asru_link_t *);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_case.c b/usr/src/cmd/fm/fmd/common/fmd_case.c
index d45475e599..8a03b670eb 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_case.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_case.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -373,9 +373,20 @@ static void
fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg)
{
int *rvalp = (int *)arg;
- int state = fmd_asru_al_getstate(alp);
+ int state;
nvlist_t *asru;
+ /*
+ * if this a proxy case and this suspect doesn't have an local asru
+ * then state is unknown so we must assume it may still be unusable.
+ */
+ if ((alp->al_flags & FMD_ASRU_PROXY) &&
+ !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) {
+ *rvalp |= B_TRUE;
+ return;
+ }
+
+ state = fmd_asru_al_getstate(alp);
if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0)
return;
*rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT));
@@ -430,8 +441,13 @@ fmd_case_mkevent(fmd_case_t *cp, const char *class)
if (msg == B_FALSE)
cip->ci_flags |= FMD_CF_INVISIBLE;
- nvl = fmd_protocol_list(class, cip->ci_mod->mod_fmri, cip->ci_uuid,
- code, count, nva, ba, msg, &cip->ci_tv);
+ /*
+ * Use the ci_diag_de if one has been saved (eg for an injected fault).
+ * Otherwise use the authority for the current module.
+ */
+ nvl = fmd_protocol_list(class, cip->ci_diag_de == NULL ?
+ cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_uuid, code, count,
+ nva, ba, msg, &cip->ci_tv);
(void) pthread_mutex_unlock(&cip->ci_lock);
return (nvl);
@@ -599,7 +615,10 @@ fmd_case_convict(fmd_case_t *cp)
fmd_asru_link_t *alp;
(void) pthread_mutex_lock(&cip->ci_lock);
- (void) fmd_case_mkcode(cp);
+ if (cip->ci_code == NULL)
+ (void) fmd_case_mkcode(cp);
+ else if (cip->ci_precanned)
+ fmd_case_code_hash_insert(fmd.d_cases, cip);
if (fmd_case_check_for_dups(cp) == 1) {
(void) pthread_mutex_unlock(&cip->ci_lock);
return (1);
@@ -615,6 +634,8 @@ fmd_case_convict(fmd_case_t *cp)
"%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
continue;
}
+ alp->al_flags |= FMD_ASRU_PRESENT;
+ alp->al_asru->asru_flags |= FMD_ASRU_PRESENT;
(void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
}
@@ -641,7 +662,8 @@ fmd_case_publish(fmd_case_t *cp, uint_t state)
/*
* If we already have a code, then case is already solved.
*/
- if (cip->ci_code != NULL) {
+ if (cip->ci_precanned == 0 && cip->ci_xprt == NULL &&
+ cip->ci_code != NULL) {
(void) pthread_mutex_unlock(&cip->ci_lock);
break;
}
@@ -657,6 +679,25 @@ fmd_case_publish(fmd_case_t *cp, uint_t state)
fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
break;
}
+ if (cip->ci_xprt != NULL) {
+ /*
+ * For proxy, save some information about the transport
+ * in the resource cache.
+ */
+ int count = 0;
+ fmd_asru_set_on_proxy_t fasp;
+ fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)cip->ci_xprt;
+
+ fasp.fasp_countp = &count;
+ fasp.fasp_maxcount = cip->ci_nsuspects;
+ fasp.fasp_proxy_asru = cip->ci_proxy_asru;
+ fasp.fasp_proxy_external = xip->xi_flags &
+ FMD_XPRT_EXTERNAL;
+ fasp.fasp_proxy_rdonly = ((xip->xi_flags &
+ FMD_XPRT_RDWR) == FMD_XPRT_RDONLY);
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
+ fmd_asru_set_on_proxy, &fasp);
+ }
nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
@@ -867,6 +908,15 @@ fmd_case_destroy_suspects(fmd_case_impl_t *cip)
ASSERT(MUTEX_HELD(&cip->ci_lock));
+ if (cip->ci_proxy_asru)
+ fmd_free(cip->ci_proxy_asru, sizeof (uint8_t) *
+ cip->ci_nsuspects);
+ if (cip->ci_diag_de)
+ nvlist_free(cip->ci_diag_de);
+ if (cip->ci_diag_asru)
+ fmd_free(cip->ci_diag_asru, sizeof (uint8_t) *
+ cip->ci_nsuspects);
+
for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
ncis = cis->cis_next;
nvlist_free(cis->cis_nvl);
@@ -921,12 +971,28 @@ fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
*/
if (mp == fmd.d_rmod) {
/*
+ * In case the case has already been created from
+ * a checkpoint file we need to set up code now.
+ */
+ if (cip->ci_state < FMD_CASE_CLOSED) {
+ if (code != NULL && cip->ci_code == NULL) {
+ cip->ci_code = fmd_strdup(code,
+ FMD_SLEEP);
+ cip->ci_codelen = cip->ci_code ?
+ strlen(cip->ci_code) + 1 : 0;
+ fmd_case_code_hash_insert(fmd.d_cases,
+ cip);
+ }
+ }
+
+ /*
* When recreating an orphan case, state passed in may
* either be CLOSED (faulty) or REPAIRED (!faulty). If
* any suspects are still CLOSED (faulty) then the
* overall state needs to be CLOSED.
*/
- if (state == FMD_CASE_CLOSED)
+ if (cip->ci_state == FMD_CASE_REPAIRED &&
+ state == FMD_CASE_CLOSED)
cip->ci_state = FMD_CASE_CLOSED;
(void) pthread_mutex_unlock(&cip->ci_lock);
fmd_case_rele((fmd_case_t *)cip);
@@ -1202,7 +1268,8 @@ fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
cip->ci_nsuspects++;
(void) pthread_mutex_unlock(&cip->ci_lock);
- fmd_module_setcdirty(cip->ci_mod);
+ if (cip->ci_xprt == NULL)
+ fmd_module_setcdirty(cip->ci_mod);
}
void
@@ -1213,9 +1280,6 @@ fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
boolean_t b;
(void) pthread_mutex_lock(&cip->ci_lock);
- ASSERT(cip->ci_state == FMD_CASE_CLOSED ||
- cip->ci_state == FMD_CASE_REPAIRED);
- ASSERT(cip->ci_mod == fmd.d_rmod);
cis->cis_next = cip->ci_suspects;
cis->cis_nvl = nvl;
@@ -1270,7 +1334,7 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
(void) pthread_mutex_lock(&cip->ci_lock);
if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED))
- flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED);
+ flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED | FMD_CF_RESOLVED);
cip->ci_flags |= flags;
@@ -1319,21 +1383,42 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
break;
case FMD_CASE_REPAIRED:
- ASSERT(fmd_case_orphaned(cp));
+ ASSERT(cip->ci_xprt != NULL || fmd_case_orphaned(cp));
/*
- * If all suspects are already either usable or not present then
- * transition straight to RESOLVED state, publishing both the
- * list.repaired and list.resolved.
+ * If we've been requested to transition straight on to the
+ * RESOLVED state (which can happen with fault proxying where a
+ * list.resolved or a uuresolved is received from the other
+ * side), or if all suspects are already either usable or not
+ * present then transition straight to RESOLVED state,
+ * publishing both the list.repaired and list.resolved. For a
+ * proxy, if we discover here that all suspects are already
+ * either usable or not present, notify the diag side instead
+ * using fmd_xprt_uuresolved().
*/
- fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
- fmd_case_unusable_and_present, &any_unusable_and_present);
- if (any_unusable_and_present)
- break;
+ if (flags & FMD_CF_RESOLVED) {
+ if (cip->ci_xprt != NULL) {
+ fmd_list_delete(&cip->ci_mod->mod_cases, cip);
+ } else {
+ fmd_module_lock(cip->ci_mod);
+ fmd_list_delete(&cip->ci_mod->mod_cases, cip);
+ fmd_module_unlock(cip->ci_mod);
+ }
+ } else {
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
+ fmd_case_unusable_and_present,
+ &any_unusable_and_present);
+ if (any_unusable_and_present)
+ break;
+ if (cip->ci_xprt != NULL) {
+ fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid);
+ break;
+ }
+ fmd_module_lock(cip->ci_mod);
+ fmd_list_delete(&cip->ci_mod->mod_cases, cip);
+ fmd_module_unlock(cip->ci_mod);
+ }
- fmd_module_lock(cip->ci_mod);
- fmd_list_delete(&cip->ci_mod->mod_cases, cip);
- fmd_module_unlock(cip->ci_mod);
cip->ci_state = FMD_CASE_RESOLVED;
(void) pthread_mutex_unlock(&cip->ci_lock);
fmd_case_publish(cp, state);
@@ -1346,6 +1431,17 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
break;
case FMD_CASE_RESOLVED:
+ /*
+ * For a proxy, no need to check that all suspects are already
+ * either usable or not present - this request has come from
+ * the diagnosing side which makes the final decision on this.
+ */
+ if (cip->ci_xprt != NULL) {
+ fmd_list_delete(&cip->ci_mod->mod_cases, cip);
+ resolved = 1;
+ break;
+ }
+
ASSERT(fmd_case_orphaned(cp));
/*
@@ -1473,6 +1569,100 @@ fmd_case_commit(fmd_case_t *cp)
}
/*
+ * On proxy side, send back repair/acquit/etc request to diagnosing side
+ */
+void
+fmd_case_xprt_updated(fmd_case_t *cp)
+{
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+ nvlist_t **nva;
+ uint8_t *ba;
+ int msg = B_TRUE;
+ int count = 0;
+ fmd_case_lst_t fcl;
+
+ ASSERT(cip->ci_xprt != NULL);
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
+ nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
+ fcl.fcl_countp = &count;
+ fcl.fcl_maxcount = cip->ci_nsuspects;
+ fcl.fcl_msgp = &msg;
+ fcl.fcl_ba = ba;
+ fcl.fcl_nva = nva;
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+ fmd_xprt_updated(cip->ci_xprt, cip->ci_uuid, ba, cip->ci_proxy_asru,
+ count);
+}
+
+/*
+ * fmd_case_update_status() can be called on either the proxy side when a
+ * list.suspect is received, or on the diagnosing side when an update request
+ * is received from the proxy. It updates the status in the resource cache.
+ */
+void
+fmd_case_update_status(fmd_case_t *cp, uint8_t *statusp, uint8_t *proxy_asrup,
+ uint8_t *diag_asrup)
+{
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+ int count = 0;
+ fmd_asru_update_status_t faus;
+
+ /*
+ * update status of resource cache entries
+ */
+ faus.faus_countp = &count;
+ faus.faus_maxcount = cip->ci_nsuspects;
+ faus.faus_ba = statusp;
+ faus.faus_proxy_asru = proxy_asrup;
+ faus.faus_diag_asru = diag_asrup;
+ faus.faus_is_proxy = (cip->ci_xprt != NULL);
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_update_status,
+ &faus);
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+}
+
+/*
+ * Called on either the proxy side or the diag side when a repair has taken
+ * place on the other side but this side may know the asru "contains"
+ * relationships.
+ */
+void
+fmd_case_update_containees(fmd_case_t *cp)
+{
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
+ fmd_asru_update_containees, NULL);
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+}
+
+/*
+ * fmd_case_close_status() is called on diagnosing side when proxy side
+ * has had a uuclose. It updates the status in the resource cache.
+ */
+void
+fmd_case_close_status(fmd_case_t *cp)
+{
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+ int count = 0;
+ fmd_asru_close_status_t facs;
+
+ /*
+ * update status of resource cache entries
+ */
+ facs.facs_countp = &count;
+ facs.facs_maxcount = cip->ci_nsuspects;
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_close_status,
+ &facs);
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+}
+
+/*
* Indicate that the case may need to change state because one or more of the
* ASRUs named as a suspect has changed state. We examine all the suspects
* and if none are still faulty, we initiate a case close transition.
@@ -1487,7 +1677,7 @@ fmd_case_update(fmd_case_t *cp)
(void) pthread_mutex_lock(&cip->ci_lock);
cstate = cip->ci_state;
- if (cip->ci_xprt != NULL || cip->ci_state < FMD_CASE_SOLVED) {
+ if (cip->ci_state < FMD_CASE_SOLVED) {
(void) pthread_mutex_unlock(&cip->ci_lock);
return; /* update is not appropriate */
}
@@ -1497,6 +1687,7 @@ fmd_case_update(fmd_case_t *cp)
return; /* already repaired */
}
+ TRACE((FMD_DBG_CASE, "case update %s", cip->ci_uuid));
fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
(void) pthread_mutex_unlock(&cip->ci_lock);
@@ -1505,6 +1696,7 @@ fmd_case_update(fmd_case_t *cp)
fmd_event_t *e;
char *class;
+ TRACE((FMD_DBG_CASE, "sending list.updated %s", cip->ci_uuid));
nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
@@ -1525,7 +1717,7 @@ fmd_case_update(fmd_case_t *cp)
* Delete a closed case from the module's case list once the fmdo_close() entry
* point has run to completion. If the case is owned by a transport module,
* tell the transport to proxy a case close on the other end of the transport.
- * If not, transition to the appropriate next state based on ci_flags. This
+ * Transition to the appropriate next state based on ci_flags. This
* function represents the end of CLOSE_WAIT and transitions the case to either
* CLOSED or REPAIRED or discards it entirely because it was never solved;
* refer to the topmost block comment explaining the state machine for details.
@@ -1537,6 +1729,7 @@ fmd_case_delete(fmd_case_t *cp)
fmd_modstat_t *msp;
size_t buftotal;
+ TRACE((FMD_DBG_CASE, "case delete %s", cip->ci_uuid));
ASSERT(fmd_module_locked(cip->ci_mod));
fmd_list_delete(&cip->ci_mod->mod_cases, cip);
buftotal = fmd_buf_hash_destroy(&cip->ci_bufs);
@@ -1560,11 +1753,11 @@ fmd_case_delete(fmd_case_t *cp)
fmd_module_hold(cip->ci_mod);
/*
- * If the case is not proxied and it has been solved, then retain it
+ * If the case has been solved, then retain it
* on the root module's case list at least until we're transitioned.
* Otherwise free the case with our final fmd_case_rele() below.
*/
- if (cip->ci_xprt == NULL && (cip->ci_flags & FMD_CF_SOLVED)) {
+ if (cip->ci_flags & FMD_CF_SOLVED) {
fmd_module_lock(cip->ci_mod);
fmd_list_append(&cip->ci_mod->mod_cases, cip);
fmd_module_unlock(cip->ci_mod);
@@ -1572,22 +1765,26 @@ fmd_case_delete(fmd_case_t *cp)
}
/*
- * If a proxied case finishes CLOSE_WAIT, then it can be discarded
- * rather than orphaned because by definition it can have no entries
- * in the resource cache of the current fault manager.
+ * Transition onwards to REPAIRED or CLOSED as originally requested.
+ * Note that for proxy case if we're transitioning to CLOSED it means
+ * the case was isolated locally, so call fmd_xprt_uuclose() to notify
+ * the diagnosing side. No need to notify the diagnosing side if we are
+ * transitioning to REPAIRED as we only do this when requested to do
+ * so by the diagnosing side anyway.
*/
- if (cip->ci_xprt != NULL)
- fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
- else if (cip->ci_flags & FMD_CF_REPAIRED)
+ if (cip->ci_flags & FMD_CF_REPAIRED)
fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
- else if (cip->ci_flags & FMD_CF_ISOLATED)
+ else if (cip->ci_flags & FMD_CF_ISOLATED) {
fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
+ if (cip->ci_xprt != NULL)
+ fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
+ }
fmd_case_rele(cp);
}
void
-fmd_case_discard(fmd_case_t *cp)
+fmd_case_discard(fmd_case_t *cp, boolean_t delete_from_asru_cache)
{
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
@@ -1597,6 +1794,11 @@ fmd_case_discard(fmd_case_t *cp)
ASSERT(fmd_module_locked(cip->ci_mod));
fmd_list_delete(&cip->ci_mod->mod_cases, cip);
+ if (delete_from_asru_cache) {
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ fmd_asru_hash_delete_case(fmd.d_asrus, cp);
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+ }
fmd_case_rele(cp);
}
@@ -1612,15 +1814,11 @@ fmd_case_repair(fmd_case_t *cp)
{
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
uint_t cstate;
+ fmd_asru_rep_arg_t fara;
(void) pthread_mutex_lock(&cip->ci_lock);
cstate = cip->ci_state;
- if (cip->ci_xprt != NULL) {
- (void) pthread_mutex_unlock(&cip->ci_lock);
- return (fmd_set_errno(EFMD_CASE_OWNER));
- }
-
if (cstate < FMD_CASE_SOLVED) {
(void) pthread_mutex_unlock(&cip->ci_lock);
return (fmd_set_errno(EFMD_CASE_STATE));
@@ -1631,9 +1829,23 @@ fmd_case_repair(fmd_case_t *cp)
return (0); /* already repaired */
}
- fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, NULL);
+ TRACE((FMD_DBG_CASE, "case repair %s", cip->ci_uuid));
+ fara.fara_reason = FMD_ASRU_REPAIRED;
+ fara.fara_bywhat = FARA_BY_CASE;
+ fara.fara_rval = NULL;
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
(void) pthread_mutex_unlock(&cip->ci_lock);
+ /*
+ * if this is a proxied case, send the repair across the transport.
+ * The remote side will then do the repair and send a list.repaired back
+ * again such that we can finally repair the case on this side.
+ */
+ if (cip->ci_xprt != NULL) {
+ fmd_case_xprt_updated(cp);
+ return (0);
+ }
+
if (cstate == FMD_CASE_CLOSED)
fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
else
@@ -1647,15 +1859,11 @@ fmd_case_acquit(fmd_case_t *cp)
{
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
uint_t cstate;
+ fmd_asru_rep_arg_t fara;
(void) pthread_mutex_lock(&cip->ci_lock);
cstate = cip->ci_state;
- if (cip->ci_xprt != NULL) {
- (void) pthread_mutex_unlock(&cip->ci_lock);
- return (fmd_set_errno(EFMD_CASE_OWNER));
- }
-
if (cstate < FMD_CASE_SOLVED) {
(void) pthread_mutex_unlock(&cip->ci_lock);
return (fmd_set_errno(EFMD_CASE_STATE));
@@ -1666,9 +1874,23 @@ fmd_case_acquit(fmd_case_t *cp)
return (0); /* already repaired */
}
- fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_acquit, NULL);
+ TRACE((FMD_DBG_CASE, "case acquit %s", cip->ci_uuid));
+ fara.fara_reason = FMD_ASRU_ACQUITTED;
+ fara.fara_bywhat = FARA_BY_CASE;
+ fara.fara_rval = NULL;
+ fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
(void) pthread_mutex_unlock(&cip->ci_lock);
+ /*
+ * if this is a proxied case, send the repair across the transport.
+ * The remote side will then do the repair and send a list.repaired back
+ * again such that we can finally repair the case on this side.
+ */
+ if (cip->ci_xprt != NULL) {
+ fmd_case_xprt_updated(cp);
+ return (0);
+ }
+
if (cstate == FMD_CASE_CLOSED)
fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
else
@@ -1722,6 +1944,25 @@ fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec)
((fmd_case_impl_t *)cp)->ci_tv_valid = 1;
}
+void
+fmd_case_set_de_fmri(fmd_case_t *cp, nvlist_t *nvl)
+{
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+
+ if (cip->ci_diag_de)
+ nvlist_free(cip->ci_diag_de);
+ cip->ci_diag_de = nvl;
+}
+
+void
+fmd_case_setcode(fmd_case_t *cp, char *code)
+{
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+
+ cip->ci_code = fmd_strdup(code, FMD_SLEEP);
+ cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
+}
+
/*ARGSUSED*/
void
fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
@@ -1734,7 +1975,7 @@ fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
int any_unusable_and_present = 0;
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
- if (cip->ci_state < FMD_CASE_SOLVED)
+ if (cip->ci_state < FMD_CASE_SOLVED || cip->ci_xprt != NULL)
return;
fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
@@ -1755,18 +1996,24 @@ fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
fmd_module_unlock(cip->ci_mod);
cip->ci_state = FMD_CASE_RESOLVED;
+ TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
+ cip->ci_uuid));
nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
class);
fmd_dispq_dispatch(fmd.d_disp, e, class);
+ TRACE((FMD_DBG_CASE, "replay sending list.resolved %s",
+ cip->ci_uuid));
fmd_case_publish(cp, FMD_CASE_RESOLVED);
(void) pthread_mutex_lock(&cip->ci_lock);
fmd_asru_hash_delete_case(fmd.d_asrus, cp);
(void) pthread_mutex_unlock(&cip->ci_lock);
fmd_case_rele(cp);
} else {
+ TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
+ cip->ci_uuid));
nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
@@ -1778,6 +2025,8 @@ fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
* if some but not all of the suspects are not faulty, replay
* the list.updated.
*/
+ TRACE((FMD_DBG_CASE, "replay sending list.updated %s",
+ cip->ci_uuid));
nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_case.h b/usr/src/cmd/fm/fmd/common/fmd_case.h
index a635173795..354e3f35a1 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_case.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_case.h
@@ -20,15 +20,13 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _FMD_CASE_H
#define _FMD_CASE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <pthread.h>
#include <libnvpair.h>
@@ -62,6 +60,10 @@ typedef struct fmd_case_impl {
size_t ci_codelen; /* size of ci_code buffer in bytes */
struct fmd_module *ci_mod; /* module that owns this case */
fmd_xprt_t *ci_xprt; /* transport for this case (or NULL) */
+ uint8_t ci_precanned; /* precanned code from injection */
+ nvlist_t *ci_diag_de; /* diag side de fmri */
+ uint8_t *ci_diag_asru; /* is asru valid on diag side */
+ uint8_t *ci_proxy_asru; /* is asru valid on proxy side */
void *ci_data; /* data from fmd_case_setspecific() */
pthread_mutex_t ci_lock; /* lock for remainder of contents */
uint_t ci_refs; /* reference count */
@@ -91,10 +93,18 @@ typedef struct fmd_case_impl {
#define FMD_CF_SOLVED 0x02 /* case has been solved */
#define FMD_CF_ISOLATED 0x04 /* case has been isolated */
#define FMD_CF_REPAIRED 0x08 /* case has been repaired */
-#define FMD_CF_REPAIRING 0x10 /* case repair in progress */
+#define FMD_CF_RESOLVED 0x10 /* case has been resolved */
#define FMD_CF_INVISIBLE 0x20 /* case should be invisible */
#define FMD_CF_DELETING 0x40 /* case is about to be deleted */
+/*
+ * ci_proxy_asru flags record if we created a new asru on the proxy side and
+ * if so whether it is derived from the received asru or received resource.
+ */
+#define FMD_PROXY_ASRU_NOT_NEEDED 0
+#define FMD_PROXY_ASRU_FROM_ASRU 1
+#define FMD_PROXY_ASRU_FROM_RSRC 2
+
typedef struct fmd_case_hash {
pthread_rwlock_t ch_lock; /* lock protecting case hash */
fmd_case_impl_t **ch_hash; /* hash bucket array for cases */
@@ -135,8 +145,15 @@ extern void fmd_case_clrdirty(fmd_case_t *);
extern void fmd_case_commit(fmd_case_t *);
extern void fmd_case_update(fmd_case_t *);
extern void fmd_case_delete(fmd_case_t *);
-extern void fmd_case_discard(fmd_case_t *);
+extern void fmd_case_discard(fmd_case_t *, boolean_t);
extern void fmd_case_settime(fmd_case_t *, time_t, suseconds_t);
+extern void fmd_case_setcode(fmd_case_t *, char *);
+extern void fmd_case_set_de_fmri(fmd_case_t *, nvlist_t *);
+extern void fmd_case_update_status(fmd_case_t *, uint8_t *, uint8_t *,
+ uint8_t *);
+extern void fmd_case_update_containees(fmd_case_t *);
+extern void fmd_case_xprt_updated(fmd_case_t *);
+extern void fmd_case_close_status(fmd_case_t *);
extern int fmd_case_repair(fmd_case_t *);
extern int fmd_case_acquit(fmd_case_t *);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_ckpt.c b/usr/src/cmd/fm/fmd/common/fmd_ckpt.c
index 4c7a645be5..4bae10f7b4 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_ckpt.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_ckpt.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1057,11 +1057,19 @@ fmd_ckpt_restore_case(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
/*
* Once solved, treat suspects from resource cache as master copy.
+ *
+ * If !fmd.d_running, this module must be a builtin, and so we don't
+ * want to restore suspects or call fmd_case_transition_update() at this
+ * stage. The suspects will be added later from the resource cache.
+ * Calling fmd_case_transition("SOLVED") is OK here as the state is
+ * already solved, so all it does is update the case flags.
*/
- if ((n = ((fmd_case_impl_t *)cp)->ci_nsuspects) == 0)
+ if (fmd.d_running && (n = ((fmd_case_impl_t *)cp)->ci_nsuspects) == 0)
n = fmd_ckpt_restore_suspects(ckp, cp, fcfc->fcfc_suspects);
- if (fcfc->fcfc_state == FCF_CASE_SOLVED)
+ if (!fmd.d_running)
+ fmd_case_transition(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
+ else if (fcfc->fcfc_state == FCF_CASE_SOLVED)
fmd_case_transition_update(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n != 0)
fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_SOLVED);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_dispq.c b/usr/src/cmd/fm/fmd/common/fmd_dispq.c
index 8519a4475a..1aa0e0dbd7 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_dispq.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_dispq.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/fm/protocol.h>
#include <sys/bitmap.h>
@@ -330,6 +328,7 @@ fmd_dispq_dispatch_gid(fmd_dispq_t *dqp,
if (FMD_EVENT_TYPE(ep) == FMD_EVT_PROTOCOL &&
(strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
+ strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 ||
strcmp(class, FM_LIST_UPDATED_CLASS) == 0) &&
nvlist_lookup_nvlist_array(FMD_EVENT_NVL(ep), FM_SUSPECT_FAULT_LIST,
&nva, &nvc) == 0) {
diff --git a/usr/src/cmd/fm/fmd/common/fmd_mdb.c b/usr/src/cmd/fm/fmd/common/fmd_mdb.c
index ea5b553e10..a9e89d37bc 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_mdb.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_mdb.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/mdb_modapi.h>
#include <limits.h>
@@ -1053,6 +1051,63 @@ fmd_asru(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
return (DCMD_OK);
}
+static int
+al_walk_init(mdb_walk_state_t *wsp)
+{
+ fmd_asru_hash_t ah;
+ fmd_t F;
+
+ if (wsp->walk_addr == NULL && mdb_readvar(&F, "fmd") != sizeof (F)) {
+ mdb_warn("failed to read fmd meta-data");
+ return (WALK_ERR);
+ }
+
+ if (wsp->walk_addr == NULL)
+ wsp->walk_addr = (uintptr_t)F.d_asrus;
+
+ if (mdb_vread(&ah, sizeof (ah), wsp->walk_addr) != sizeof (ah)) {
+ mdb_warn("failed to read asru_hash at %p", wsp->walk_addr);
+ return (WALK_ERR);
+ }
+
+ return (hash_walk_init(wsp, (uintptr_t)ah.ah_rsrc_hash, ah.ah_hashlen,
+ "fmd_asru_link", sizeof (fmd_asru_link_t), OFFSETOF(fmd_asru_link_t,
+ al_rsrc_next)));
+}
+
+static int
+fmd_asru_link(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ char uuid[48], name[PATH_MAX];
+ fmd_asru_link_t a;
+
+ if (!(flags & DCMD_ADDRSPEC)) {
+ if (mdb_walk_dcmd("fmd_asru_link", "fmd_asru_link", argc,
+ argv) != 0) {
+ mdb_warn("failed to walk fmd_asru_link hash");
+ return (DCMD_ERR);
+ }
+ return (DCMD_OK);
+ }
+
+ if (mdb_vread(&a, sizeof (a), addr) != sizeof (a)) {
+ mdb_warn("failed to read fmd_asru_link at %p", addr);
+ return (DCMD_ERR);
+ }
+
+ if (DCMD_HDRSPEC(flags))
+ mdb_printf("%<u>%-8s %-36s %s%</u>\n", "ADDR", "UUID", "NAME");
+
+ if (mdb_readstr(uuid, sizeof (uuid), (uintptr_t)a.al_uuid) <= 0)
+ (void) mdb_snprintf(uuid, sizeof (uuid), "<%p>", a.al_uuid);
+ if (mdb_readstr(name, sizeof (name), (uintptr_t)a.al_rsrc_name) <= 0)
+ (void) mdb_snprintf(name, sizeof (name), "<%p>",
+ a.al_rsrc_name);
+
+ mdb_printf("%-8p %-36s %s\n", addr, uuid, name);
+ return (DCMD_OK);
+}
+
/*ARGSUSED*/
static int
fcf_hdr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
@@ -1522,6 +1577,7 @@ static const mdb_dcmd_t dcmds[] = {
{ "fmd_buf", ":", "display buffer structure", fmd_buf },
{ "fmd_serd", "[:]", "display serd engine structure", fmd_serd },
{ "fmd_asru", "?", "display asru resource structure", fmd_asru },
+ { "fmd_asru_link", "?", "display resource structure", fmd_asru_link },
{ "fmd_timer", "?", "display pending timer(s)", fmd_timer },
{ "fmd_xprt", "?[-lrsu]", "display event transport(s)", fmd_xprt },
{ NULL }
@@ -1546,6 +1602,8 @@ static const mdb_walker_t walkers[] = {
serd_walk_init, hash_walk_step, hash_walk_fini },
{ "fmd_asru", "walk asru resource hash",
asru_walk_init, hash_walk_step, hash_walk_fini },
+ { "fmd_asru_link", "walk resource hash",
+ al_walk_init, hash_walk_step, hash_walk_fini },
{ "fmd_timerq", "walk timer queue",
tmq_walk_init, tmq_walk_step, NULL },
{ "fmd_xprt", "walk per-module list of transports",
diff --git a/usr/src/cmd/fm/fmd/common/fmd_protocol.c b/usr/src/cmd/fm/fmd/common/fmd_protocol.c
index 7064af0164..543a7786bb 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_protocol.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_protocol.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/fm/protocol.h>
#include <strings.h>
#include <alloca.h>
@@ -183,7 +181,7 @@ fmd_protocol_rsrc_asru(const char *class,
nvlist_t *fmri, const char *uuid, const char *code,
boolean_t faulty, boolean_t unusable, boolean_t message, nvlist_t *event,
struct timeval *tvp, boolean_t repaired, boolean_t replaced,
- boolean_t acquitted)
+ boolean_t acquitted, nvlist_t *diag_de)
{
nvlist_t *nvl;
int64_t tod[2];
@@ -214,6 +212,9 @@ fmd_protocol_rsrc_asru(const char *class,
err |= nvlist_add_boolean_value(nvl, FM_SUSPECT_MESSAGE, message);
err |= nvlist_add_int64_array(nvl, FM_SUSPECT_DIAG_TIME, tod, 2);
+ if (diag_de != NULL)
+ err |= nvlist_add_nvlist(nvl, FM_SUSPECT_DE, diag_de);
+
if (event != NULL)
err |= nvlist_add_nvlist(nvl, FM_RSRC_ASRU_EVENT, event);
@@ -335,3 +336,35 @@ fmd_protocol_xprt_uuclose(fmd_module_t *mp, const char *class, uint8_t version,
return (nvl);
}
+
+nvlist_t *
+fmd_protocol_xprt_uuresolved(fmd_module_t *mp, const char *class,
+ uint8_t version, const char *uuid)
+{
+ nvlist_t *nvl = fmd_protocol_xprt_ctl(mp, class, version);
+ int err = nvlist_add_string(nvl, FM_RSRC_XPRT_UUID, uuid);
+
+ if (err != 0)
+ fmd_panic("failed to populate nvlist: %s\n", fmd_strerror(err));
+
+ return (nvl);
+}
+
+nvlist_t *
+fmd_protocol_xprt_updated(fmd_module_t *mp, const char *class, uint8_t version,
+ const char *uuid, uint8_t *statusp, uint8_t *has_asrup, uint_t nelem)
+{
+ nvlist_t *nvl = fmd_protocol_xprt_ctl(mp, class, version);
+ int err = nvlist_add_string(nvl, FM_RSRC_XPRT_UUID, uuid);
+
+ err |= nvlist_add_uint8_array(nvl, FM_RSRC_XPRT_FAULT_STATUS, statusp,
+ nelem);
+ if (has_asrup)
+ err |= nvlist_add_uint8_array(nvl, FM_RSRC_XPRT_FAULT_HAS_ASRU,
+ has_asrup, nelem);
+
+ if (err != 0)
+ fmd_panic("failed to populate nvlist: %s\n", fmd_strerror(err));
+
+ return (nvl);
+}
diff --git a/usr/src/cmd/fm/fmd/common/fmd_protocol.h b/usr/src/cmd/fm/fmd/common/fmd_protocol.h
index 68f2196b18..927a875ec3 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_protocol.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_protocol.h
@@ -20,15 +20,13 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _FMD_PROTOCOL_H
#define _FMD_PROTOCOL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/fm/protocol.h>
#include <libnvpair.h>
#include <stdarg.h>
@@ -76,7 +74,7 @@ extern nvlist_t *fmd_protocol_list(const char *, nvlist_t *,
struct timeval *);
extern nvlist_t *fmd_protocol_rsrc_asru(const char *, nvlist_t *,
const char *, const char *, boolean_t, boolean_t, boolean_t, nvlist_t *,
- struct timeval *m, boolean_t, boolean_t, boolean_t);
+ struct timeval *m, boolean_t, boolean_t, boolean_t, nvlist_t *);
extern nvlist_t *fmd_protocol_fmderror(int, const char *, va_list);
extern nvlist_t *fmd_protocol_moderror(struct fmd_module *, int, const char *);
extern nvlist_t *fmd_protocol_xprt_ctl(struct fmd_module *,
@@ -85,6 +83,10 @@ extern nvlist_t *fmd_protocol_xprt_sub(struct fmd_module *,
const char *, uint8_t, const char *);
extern nvlist_t *fmd_protocol_xprt_uuclose(struct fmd_module *,
const char *, uint8_t, const char *);
+extern nvlist_t *fmd_protocol_xprt_uuresolved(struct fmd_module *,
+ const char *, uint8_t, const char *);
+extern nvlist_t *fmd_protocol_xprt_updated(struct fmd_module *,
+ const char *, uint8_t, const char *, uint8_t *, uint8_t *, uint_t);
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
index 2987849868..07e43656b2 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <strings.h>
#include <limits.h>
#include <unistd.h>
@@ -465,89 +463,69 @@ fmd_adm_rsrcinfo_1_svc(char *fmri,
return (TRUE);
}
-bool_t
-fmd_adm_rsrcflush_1_svc(char *name, int *rvp, struct svc_req *req)
-{
- return (fmd_adm_rsrcrepaired_1_svc(name, rvp, req));
-}
-
-bool_t
-fmd_adm_rsrcrepaired_1_svc(char *name, int *rvp, struct svc_req *req)
+static void
+fmd_adm_do_repair(char *name, struct svc_req *req, int *errp, uint8_t reason,
+ char *uuid)
{
- int err = FMD_ADM_ERR_RSRCNOTF;
-
if (fmd_rpc_deny(req))
- err = FMD_ADM_ERR_PERM;
+ *errp = FMD_ADM_ERR_PERM;
else {
+ fmd_asru_rep_arg_t fara;
+
+ fara.fara_reason = reason;
+ fara.fara_rval = errp;
+ fara.fara_uuid = uuid;
+ fara.fara_bywhat = FARA_BY_ASRU;
fmd_asru_hash_apply_by_asru(fmd.d_asrus, name,
- fmd_asru_repaired, &err);
+ fmd_asru_repaired, &fara);
+ fara.fara_bywhat = FARA_BY_LABEL;
fmd_asru_hash_apply_by_label(fmd.d_asrus, name,
- fmd_asru_repaired, &err);
+ fmd_asru_repaired, &fara);
+ fara.fara_bywhat = FARA_BY_FRU;
fmd_asru_hash_apply_by_fru(fmd.d_asrus, name,
- fmd_asru_repaired, &err);
+ fmd_asru_repaired, &fara);
+ fara.fara_bywhat = FARA_BY_RSRC;
fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name,
- fmd_asru_repaired, &err);
+ fmd_asru_repaired, &fara);
}
+}
+
+bool_t
+fmd_adm_rsrcflush_1_svc(char *name, int *rvp, struct svc_req *req)
+{
+ int err = FMD_ADM_ERR_RSRCNOTF;
+
+ fmd_adm_do_repair(name, req, &err, FMD_ASRU_REPAIRED, NULL);
*rvp = err;
return (TRUE);
}
bool_t
-fmd_adm_rsrcreplaced_1_svc(char *name, int *rvp, struct svc_req *req)
+fmd_adm_rsrcrepaired_1_svc(char *name, int *rvp, struct svc_req *req)
{
int err = FMD_ADM_ERR_RSRCNOTF;
- if (fmd_rpc_deny(req))
- err = FMD_ADM_ERR_PERM;
- else {
- fmd_asru_hash_apply_by_asru(fmd.d_asrus, name,
- fmd_asru_replaced, &err);
- fmd_asru_hash_apply_by_label(fmd.d_asrus, name,
- fmd_asru_replaced, &err);
- fmd_asru_hash_apply_by_fru(fmd.d_asrus, name,
- fmd_asru_replaced, &err);
- fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name,
- fmd_asru_replaced, &err);
- }
+ fmd_adm_do_repair(name, req, &err, FMD_ASRU_REPAIRED, NULL);
*rvp = err;
return (TRUE);
}
-typedef struct {
- int *errp;
- char *uuid;
-} fmd_adm_ra_t;
-
-void
-fmd_asru_ra_cb(fmd_asru_link_t *alp, void *arg)
+bool_t
+fmd_adm_rsrcreplaced_1_svc(char *name, int *rvp, struct svc_req *req)
{
- fmd_adm_ra_t *farap = (fmd_adm_ra_t *)arg;
+ int err = FMD_ADM_ERR_RSRCNOTF;
- if (strcmp(farap->uuid, "") == 0 ||
- strcmp(farap->uuid, alp->al_case_uuid) == 0)
- fmd_asru_acquit(alp, farap->errp);
+ fmd_adm_do_repair(name, req, &err, FMD_ASRU_REPLACED, NULL);
+ *rvp = err;
+ return (TRUE);
}
bool_t
fmd_adm_rsrcacquit_1_svc(char *name, char *uuid, int *rvp, struct svc_req *req)
{
int err = FMD_ADM_ERR_RSRCNOTF;
- fmd_adm_ra_t fara;
- if (fmd_rpc_deny(req))
- err = FMD_ADM_ERR_PERM;
- else {
- fara.errp = &err;
- fara.uuid = uuid;
- fmd_asru_hash_apply_by_asru(fmd.d_asrus, name,
- fmd_asru_ra_cb, &fara);
- fmd_asru_hash_apply_by_label(fmd.d_asrus, name,
- fmd_asru_ra_cb, &fara);
- fmd_asru_hash_apply_by_fru(fmd.d_asrus, name,
- fmd_asru_ra_cb, &fara);
- fmd_asru_hash_apply_by_rsrc(fmd.d_asrus, name,
- fmd_asru_ra_cb, &fara);
- }
+ fmd_adm_do_repair(name, req, &err, FMD_ASRU_ACQUITTED, uuid);
*rvp = err;
return (TRUE);
}
diff --git a/usr/src/cmd/fm/fmd/common/fmd_sysevent.c b/usr/src/cmd/fm/fmd/common/fmd_sysevent.c
index 683ef1341a..658ca43a1f 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_sysevent.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_sysevent.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent.h>
#include <sys/sysevent_impl.h>
@@ -464,7 +462,8 @@ sysev_init(fmd_hdl_t *hdl)
"channel %s", sysev_channel);
}
- sysev_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
+ sysev_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY |
+ FMD_XPRT_CACHE_AS_LOCAL, NULL, NULL);
sysev_hdl = hdl;
/*
diff --git a/usr/src/cmd/fm/fmd/common/fmd_xprt.c b/usr/src/cmd/fm/fmd/common/fmd_xprt.c
index 0a4cf885de..323315cfa3 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_xprt.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_xprt.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -101,6 +101,8 @@
* FMA Class Payload
* --------- -------
* resource.fm.xprt.uuclose string (uuid of case)
+ * resource.fm.xprt.uuresolved string (uuid of case)
+ * resource.fm.xprt.updated string (uuid of case)
* resource.fm.xprt.subscribe string (class pattern)
* resource.fm.xprt.unsubscribe string (class pattern)
* resource.fm.xprt.unsuback string (class pattern)
@@ -176,6 +178,8 @@ const fmd_xprt_rule_t _fmd_xprt_state_run[] = {
{ "resource.fm.xprt.unsubscribe", fmd_xprt_event_unsub },
{ "resource.fm.xprt.unsuback", fmd_xprt_event_unsuback },
{ "resource.fm.xprt.uuclose", fmd_xprt_event_uuclose },
+{ "resource.fm.xprt.uuresolved", fmd_xprt_event_uuresolved },
+{ "resource.fm.xprt.updated", fmd_xprt_event_updated },
{ "resource.fm.xprt.*", fmd_xprt_event_error },
{ NULL, NULL }
};
@@ -510,8 +514,8 @@ fmd_xprt_send_case(fmd_case_t *cp, void *arg)
nvlist_t *nvl;
char *class;
- if (cip->ci_state != FMD_CASE_SOLVED)
- return; /* unsolved, or we'll get it during the ASRU pass */
+ if (cip->ci_state == FMD_CASE_UNSOLVED)
+ return;
nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
@@ -523,49 +527,12 @@ fmd_xprt_send_case(fmd_case_t *cp, void *arg)
fmd_dispq_dispatch_gid(fmd.d_disp, e, class, xip->xi_queue->eq_sgid);
}
-/*
- * Upon transition to RUN, we take every ASRU which is in the degraded state
- * and resend a fault.* event for it to our remote peer, in case the peer is
- * running in the fault manager that knows how to disable this resource. If
- * any new resources are added to the cache during our iteration, this is no
- * problem because our subscriptions are already proxied and so any new cases
- * will result in a list.suspect event being transported if that is needed.
- */
-static void
-fmd_xprt_send_asru(fmd_asru_t *ap, void *arg)
-{
- fmd_xprt_impl_t *xip = arg;
- nvlist_t *nvl = NULL;
- fmd_event_t *e;
- char *class;
-
- (void) pthread_mutex_lock(&ap->asru_lock);
-
- if ((ap->asru_flags & (FMD_ASRU_INTERNAL | FMD_ASRU_STATE)) ==
- FMD_ASRU_FAULTY && fmd_case_orphaned(ap->asru_case))
- (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
-
- (void) pthread_mutex_unlock(&ap->asru_lock);
-
- if (nvl == NULL)
- return; /* asru is internal, unusable, or not faulty */
-
- (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
- e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
-
- fmd_dprintf(FMD_DBG_XPRT, "re-send %s for %s to transport %u\n",
- class, ap->asru_name, xip->xi_id);
-
- fmd_dispq_dispatch_gid(fmd.d_disp, e, class, xip->xi_queue->eq_sgid);
-}
-
void
fmd_xprt_event_run(fmd_xprt_impl_t *xip, nvlist_t *nvl)
{
if (!fmd_xprt_vmismatch(xip, nvl, NULL)) {
fmd_xprt_transition(xip, _fmd_xprt_state_run, "RUN");
fmd_case_hash_apply(fmd.d_cases, fmd_xprt_send_case, xip);
- fmd_asru_hash_apply(fmd.d_asrus, fmd_xprt_send_asru, xip);
}
}
@@ -633,6 +600,9 @@ fmd_xprt_event_unsuback(fmd_xprt_impl_t *xip, nvlist_t *nvl)
(void) pthread_mutex_unlock(&xip->xi_lock);
}
+/*
+ * on diagnosing side, receive a uuclose from the proxy.
+ */
void
fmd_xprt_event_uuclose(fmd_xprt_impl_t *xip, nvlist_t *nvl)
{
@@ -644,11 +614,77 @@ fmd_xprt_event_uuclose(fmd_xprt_impl_t *xip, nvlist_t *nvl)
if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_UUID, &uuid) == 0 &&
(cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
+ /*
+ * update resource cache status and transition case
+ */
+ fmd_case_close_status(cp);
fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_ISOLATED);
fmd_case_rele(cp);
}
}
+/*
+ * on diagnosing side, receive a uuresolved from the proxy.
+ */
+void
+fmd_xprt_event_uuresolved(fmd_xprt_impl_t *xip, nvlist_t *nvl)
+{
+ fmd_case_t *cp;
+ char *uuid;
+
+ if (fmd_xprt_vmismatch(xip, nvl, NULL))
+ return; /* transitioned to error state */
+
+ if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_UUID, &uuid) == 0 &&
+ (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+
+ fmd_case_transition(cp, (cip->ci_state == FMD_CASE_REPAIRED) ?
+ FMD_CASE_RESOLVED : (cip->ci_state == FMD_CASE_CLOSED) ?
+ FMD_CASE_REPAIRED : FMD_CASE_CLOSE_WAIT, FMD_CF_RESOLVED);
+ fmd_case_rele(cp);
+ }
+}
+
+/*
+ * on diagnosing side, receive a repair/acquit from the proxy.
+ */
+void
+fmd_xprt_event_updated(fmd_xprt_impl_t *xip, nvlist_t *nvl)
+{
+ fmd_case_t *cp;
+ char *uuid;
+
+ if (fmd_xprt_vmismatch(xip, nvl, NULL))
+ return; /* transitioned to error state */
+
+ if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_UUID, &uuid) == 0 &&
+ (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
+ uint8_t *statusp, *proxy_asrup = NULL;
+ uint_t nelem = 0;
+
+ /*
+ * Only update status with new repairs if "no remote repair"
+ * is not set. Do the case_update anyway though (as this will
+ * refresh the status on the proxy side).
+ */
+ if (!(xip->xi_flags & FMD_XPRT_NO_REMOTE_REPAIR)) {
+ if (nvlist_lookup_uint8_array(nvl,
+ FM_RSRC_XPRT_FAULT_STATUS, &statusp, &nelem) == 0 &&
+ nelem != 0) {
+ (void) nvlist_lookup_uint8_array(nvl,
+ FM_RSRC_XPRT_FAULT_HAS_ASRU, &proxy_asrup,
+ &nelem);
+ fmd_case_update_status(cp, statusp,
+ proxy_asrup, NULL);
+ }
+ fmd_case_update_containees(cp);
+ }
+ fmd_case_update(cp);
+ fmd_case_rele(cp);
+ }
+}
+
void
fmd_xprt_event_error(fmd_xprt_impl_t *xip, nvlist_t *nvl)
{
@@ -879,12 +915,13 @@ fmd_xprt_destroy(fmd_xprt_t *xp)
/*
* Release every case handle in the module that was cached by this
* transport. This will result in these cases disappearing from the
- * local case hash so that fmd_case_uuclose() can no longer be used.
+ * local case hash so that fmd_case_uuclose() and fmd_case_repaired()
+ * etc can no longer be used.
*/
for (cip = fmd_list_next(&mp->mod_cases); cip != NULL; cip = nip) {
nip = fmd_list_next(cip);
if (cip->ci_xprt == xp)
- fmd_case_discard((fmd_case_t *)cip);
+ fmd_case_discard((fmd_case_t *)cip, B_TRUE);
}
/*
@@ -998,6 +1035,351 @@ fmd_xprt_send(fmd_xprt_t *xp)
}
}
+/*
+ * This function creates a local suspect list. This is used when a suspect list
+ * is created directly by an external source like fminject.
+ */
+static void
+fmd_xprt_list_suspect_local(fmd_xprt_t *xp, nvlist_t *nvl)
+{
+ nvlist_t **nvlp;
+ nvlist_t *de_fmri, *de_fmri_dup = NULL;
+ int64_t *diag_time;
+ char *code = NULL;
+ fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
+ fmd_case_t *cp;
+ uint_t nelem = 0, nelem2 = 0, i;
+
+ fmd_module_lock(xip->xi_queue->eq_mod);
+ cp = fmd_case_create(xip->xi_queue->eq_mod, NULL);
+ if (cp == NULL) {
+ fmd_module_unlock(xip->xi_queue->eq_mod);
+ return;
+ }
+
+ /*
+ * copy diag_code if present
+ */
+ (void) nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &code);
+ if (code != NULL) {
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+
+ cip->ci_precanned = 1;
+ fmd_case_setcode(cp, code);
+ }
+
+ /*
+ * copy suspects
+ */
+ (void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, &nvlp,
+ &nelem);
+ for (i = 0; i < nelem; i++) {
+ nvlist_t *flt_copy, *asru = NULL, *fru = NULL, *rsrc = NULL;
+ topo_hdl_t *thp;
+ char *loc = NULL;
+ int err;
+
+ thp = fmd_fmri_topo_hold(TOPO_VERSION);
+ (void) nvlist_xdup(nvlp[i], &flt_copy, &fmd.d_nva);
+ (void) nvlist_lookup_nvlist(nvlp[i], FM_FAULT_RESOURCE, &rsrc);
+
+ /*
+ * If no fru specified, get it from topo
+ */
+ if (nvlist_lookup_nvlist(nvlp[i], FM_FAULT_FRU, &fru) != 0 &&
+ rsrc && topo_fmri_fru(thp, rsrc, &fru, &err) == 0)
+ (void) nvlist_add_nvlist(flt_copy, FM_FAULT_FRU, fru);
+ /*
+ * If no asru specified, get it from topo
+ */
+ if (nvlist_lookup_nvlist(nvlp[i], FM_FAULT_ASRU, &asru) != 0 &&
+ rsrc && topo_fmri_asru(thp, rsrc, &asru, &err) == 0)
+ (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru);
+ /*
+ * If no location specified, get it from topo
+ */
+ if (nvlist_lookup_string(nvlp[i], FM_FAULT_LOCATION,
+ &loc) != 0) {
+ if (fru && topo_fmri_label(thp, fru, &loc, &err) == 0)
+ (void) nvlist_add_string(flt_copy,
+ FM_FAULT_LOCATION, loc);
+ else if (rsrc && topo_fmri_label(thp, rsrc, &loc,
+ &err) == 0)
+ (void) nvlist_add_string(flt_copy,
+ FM_FAULT_LOCATION, loc);
+ if (loc)
+ topo_hdl_strfree(thp, loc);
+ }
+ if (fru)
+ nvlist_free(fru);
+ if (asru)
+ nvlist_free(asru);
+ if (rsrc)
+ nvlist_free(rsrc);
+ fmd_fmri_topo_rele(thp);
+ fmd_case_insert_suspect(cp, flt_copy);
+ }
+
+ /*
+ * copy diag_time if present
+ */
+ if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
+ &nelem2) == 0 && nelem2 >= 2)
+ fmd_case_settime(cp, diag_time[0], diag_time[1]);
+
+ /*
+ * copy DE fmri if present
+ */
+ if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) {
+ (void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva);
+ fmd_case_set_de_fmri(cp, de_fmri_dup);
+ }
+
+ fmd_case_transition(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
+ fmd_module_unlock(xip->xi_queue->eq_mod);
+}
+
+/*
+ * This function is called to create a proxy case on receipt of a list.suspect
+ * from the diagnosing side of the transport.
+ */
+static void
+fmd_xprt_list_suspect(fmd_xprt_t *xp, nvlist_t *nvl)
+{
+ fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
+ nvlist_t **nvlp;
+ uint_t nelem = 0, nelem2 = 0, i;
+ int64_t *diag_time;
+ topo_hdl_t *thp;
+ char *class;
+ nvlist_t *rsrc, *asru, *de_fmri, *de_fmri_dup = NULL;
+ nvlist_t *flt_copy;
+ int err;
+ nvlist_t **asrua;
+ uint8_t *proxy_asru = NULL;
+ int got_proxy_asru = 0;
+ int got_hc_rsrc = 0;
+ int got_present_rsrc = 0;
+ uint8_t *diag_asru = NULL;
+ char *scheme;
+ uint8_t *statusp;
+ char *uuid, *code;
+ fmd_case_t *cp;
+ fmd_case_impl_t *cip;
+ int need_update = 0;
+
+ if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0)
+ return;
+ if (nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &code) != 0)
+ return;
+ (void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, &nvlp,
+ &nelem);
+
+ /*
+ * In order to implement FMD_XPRT_HCONLY and FMD_XPRT_HC_PRESENT_ONLY
+ * etc we first scan the suspects to see if
+ * - there was an asru in the received fault
+ * - there was an hc-scheme resource in the received fault
+ * - any hc-scheme resource in the received fault is present in the
+ * local topology
+ * - any hc-scheme resource in the received fault has an asru in the
+ * local topology
+ */
+ if (nelem > 0) {
+ asrua = fmd_zalloc(sizeof (nvlist_t *) * nelem, FMD_SLEEP);
+ proxy_asru = fmd_zalloc(sizeof (uint8_t) * nelem, FMD_SLEEP);
+ diag_asru = fmd_zalloc(sizeof (uint8_t) * nelem, FMD_SLEEP);
+ thp = fmd_fmri_topo_hold(TOPO_VERSION);
+ for (i = 0; i < nelem; i++) {
+ if (nvlist_lookup_nvlist(nvlp[i], FM_FAULT_ASRU,
+ &asru) == 0 && asru != NULL)
+ diag_asru[i] = 1;
+ if (nvlist_lookup_string(nvlp[i], FM_CLASS,
+ &class) != 0 || strncmp(class, "fault", 5) != 0)
+ continue;
+ /*
+ * If there is an hc-scheme asru, use that to find the
+ * real asru. Otherwise if there is an hc-scheme
+ * resource, work out the old asru from that.
+ * This order is to allow a two stage evaluation
+ * of the asru where a fault in the diagnosing side
+ * is in a component not visible to the proxy side,
+ * but prevents a component that is visible from
+ * working. So the diagnosing side sets the asru to
+ * the latter component (in hc-scheme as the diagnosing
+ * side doesn't know about the proxy side's virtual
+ * schemes), and then the proxy side can convert that
+ * to a suitable virtual scheme asru.
+ */
+ if (nvlist_lookup_nvlist(nvlp[i], FM_FAULT_ASRU,
+ &asru) == 0 && asru != NULL &&
+ nvlist_lookup_string(asru, FM_FMRI_SCHEME,
+ &scheme) == 0 &&
+ strcmp(scheme, FM_FMRI_SCHEME_HC) == 0) {
+ got_hc_rsrc = 1;
+ if (xip->xi_flags & FMD_XPRT_EXTERNAL)
+ continue;
+ if (topo_fmri_present(thp, asru, &err) == 0)
+ got_present_rsrc = 1;
+ if (topo_fmri_asru(thp, asru, &asrua[i],
+ &err) == 0) {
+ proxy_asru[i] =
+ FMD_PROXY_ASRU_FROM_ASRU;
+ got_proxy_asru = 1;
+ }
+ } else if (nvlist_lookup_nvlist(nvlp[i],
+ FM_FAULT_RESOURCE, &rsrc) == 0 && rsrc != NULL &&
+ nvlist_lookup_string(rsrc, FM_FMRI_SCHEME,
+ &scheme) == 0 &&
+ strcmp(scheme, FM_FMRI_SCHEME_HC) == 0) {
+ got_hc_rsrc = 1;
+ if (xip->xi_flags & FMD_XPRT_EXTERNAL)
+ continue;
+ if (topo_fmri_present(thp, rsrc, &err) == 0)
+ got_present_rsrc = 1;
+ if (topo_fmri_asru(thp, rsrc, &asrua[i],
+ &err) == 0) {
+ proxy_asru[i] =
+ FMD_PROXY_ASRU_FROM_RSRC;
+ got_proxy_asru = 1;
+ }
+ }
+ }
+ fmd_fmri_topo_rele(thp);
+ }
+
+ /*
+ * If we're set up only to report hc-scheme faults, and
+ * there aren't any, then just drop the event.
+ */
+ if (got_hc_rsrc == 0 && (xip->xi_flags & FMD_XPRT_HCONLY)) {
+ if (nelem > 0) {
+ fmd_free(proxy_asru, sizeof (uint8_t) * nelem);
+ fmd_free(diag_asru, sizeof (uint8_t) * nelem);
+ fmd_free(asrua, sizeof (nvlist_t *) * nelem);
+ }
+ return;
+ }
+
+ /*
+ * If we're set up only to report locally present hc-scheme
+ * faults, and there aren't any, then just drop the event.
+ */
+ if (got_present_rsrc == 0 &&
+ (xip->xi_flags & FMD_XPRT_HC_PRESENT_ONLY)) {
+ if (nelem > 0) {
+ for (i = 0; i < nelem; i++)
+ if (asrua[i])
+ nvlist_free(asrua[i]);
+ fmd_free(proxy_asru, sizeof (uint8_t) * nelem);
+ fmd_free(diag_asru, sizeof (uint8_t) * nelem);
+ fmd_free(asrua, sizeof (nvlist_t *) * nelem);
+ }
+ return;
+ }
+
+ /*
+ * If fmd_case_recreate() returns NULL, UUID is already known.
+ */
+ fmd_module_lock(xip->xi_queue->eq_mod);
+ if ((cp = fmd_case_recreate(xip->xi_queue->eq_mod, xp,
+ FMD_CASE_UNSOLVED, uuid, code)) == NULL) {
+ if (nelem > 0) {
+ for (i = 0; i < nelem; i++)
+ if (asrua[i])
+ nvlist_free(asrua[i]);
+ fmd_free(proxy_asru, sizeof (uint8_t) * nelem);
+ fmd_free(diag_asru, sizeof (uint8_t) * nelem);
+ fmd_free(asrua, sizeof (nvlist_t *) * nelem);
+ }
+ fmd_module_unlock(xip->xi_queue->eq_mod);
+ return;
+ }
+
+ cip = (fmd_case_impl_t *)cp;
+ cip->ci_diag_asru = diag_asru;
+ cip->ci_proxy_asru = proxy_asru;
+ for (i = 0; i < nelem; i++) {
+ (void) nvlist_xdup(nvlp[i], &flt_copy, &fmd.d_nva);
+ if (proxy_asru[i] != FMD_PROXY_ASRU_NOT_NEEDED) {
+ /*
+ * Copy suspects, but remove/replace asru first. Also if
+ * the original asru was hc-scheme use that as resource.
+ */
+ if (proxy_asru[i] == FMD_PROXY_ASRU_FROM_ASRU) {
+ (void) nvlist_remove(flt_copy,
+ FM_FAULT_RESOURCE, DATA_TYPE_NVLIST);
+ (void) nvlist_lookup_nvlist(flt_copy,
+ FM_FAULT_ASRU, &asru);
+ (void) nvlist_add_nvlist(flt_copy,
+ FM_FAULT_RESOURCE, asru);
+ }
+ (void) nvlist_remove(flt_copy, FM_FAULT_ASRU,
+ DATA_TYPE_NVLIST);
+ (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU,
+ asrua[i]);
+ nvlist_free(asrua[i]);
+ } else if (nvlist_lookup_nvlist(flt_copy, FM_FAULT_ASRU,
+ &asru) == 0 && asru != NULL) {
+ /*
+ * keep asru from diag side, but but mark as no retire
+ */
+ (void) nvlist_add_boolean_value(flt_copy,
+ FM_SUSPECT_RETIRE, B_FALSE);
+ }
+ fmd_case_insert_suspect(cp, flt_copy);
+ }
+ /*
+ * copy diag_time
+ */
+ if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
+ &nelem2) == 0 && nelem2 >= 2)
+ fmd_case_settime(cp, diag_time[0], diag_time[1]);
+ /*
+ * copy DE fmri
+ */
+ if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) {
+ (void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva);
+ fmd_case_set_de_fmri(cp, de_fmri_dup);
+ }
+
+ /*
+ * Transition to solved. This will log the suspect list and create
+ * the resource cache entries.
+ */
+ fmd_case_transition(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
+
+ /*
+ * Update status if it is not simply "all faulty" (can happen if
+ * list.suspects are being re-sent when the transport has reconnected).
+ */
+ (void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS, &statusp,
+ &nelem);
+ for (i = 0; i < nelem; i++) {
+ if ((statusp[i] & (FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE |
+ FM_SUSPECT_NOT_PRESENT | FM_SUSPECT_DEGRADED)) !=
+ FM_SUSPECT_FAULTY)
+ need_update = 1;
+ }
+ if (need_update) {
+ fmd_case_update_status(cp, statusp, cip->ci_proxy_asru,
+ cip->ci_diag_asru);
+ fmd_case_update_containees(cp);
+ fmd_case_update(cp);
+ }
+
+ /*
+ * if asru on proxy side, send an update back to the diagnosing side to
+ * update UNUSABLE/DEGRADED.
+ */
+ if (got_proxy_asru)
+ fmd_case_xprt_updated(cp);
+
+ if (nelem > 0)
+ fmd_free(asrua, sizeof (nvlist_t *) * nelem);
+ fmd_module_unlock(xip->xi_queue->eq_mod);
+}
+
void
fmd_xprt_recv(fmd_xprt_t *xp, nvlist_t *nvl, hrtime_t hrt, boolean_t logonly)
{
@@ -1006,12 +1388,13 @@ fmd_xprt_recv(fmd_xprt_t *xp, nvlist_t *nvl, hrtime_t hrt, boolean_t logonly)
fmd_t *dp = &fmd;
fmd_event_t *e;
- char *class, *uuid, *code;
+ char *class, *uuid;
boolean_t isproto, isereport;
uint64_t *tod;
uint8_t ttl;
uint_t n;
+ fmd_case_t *cp;
/*
* Grab the transport lock and set the busy flag to indicate we are
@@ -1165,20 +1548,100 @@ fmd_xprt_recv(fmd_xprt_t *xp, nvlist_t *nvl, hrtime_t hrt, boolean_t logonly)
/*
* If a list.suspect event is received, create a case for the specified
- * UUID in the case hash, with the transport module as its owner. If
- * the UUID is already known, fmd_case_recreate() will return NULL and
- * we simply proceed to our normal event handling regardless.
+ * UUID in the case hash, with the transport module as its owner.
+ */
+ if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_SUSPECT_CLASS)) {
+ if (xip->xi_flags & FMD_XPRT_CACHE_AS_LOCAL)
+ fmd_xprt_list_suspect_local(xp, nvl);
+ else
+ fmd_xprt_list_suspect(xp, nvl);
+ fmd_event_hold(e);
+ fmd_event_rele(e);
+ goto done;
+ }
+
+ /*
+ * If a list.updated or list.repaired event is received, update the
+ * resource cache status and the local case.
+ */
+ if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_REPAIRED_CLASS) ||
+ fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_UPDATED_CLASS)) {
+ uint8_t *statusp;
+ uint_t nelem = 0;
+
+ (void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS,
+ &statusp, &nelem);
+ fmd_module_lock(xip->xi_queue->eq_mod);
+ if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
+ (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+ if (cip->ci_xprt != NULL) {
+ fmd_case_update_status(cp, statusp,
+ cip->ci_proxy_asru, cip->ci_diag_asru);
+ fmd_case_update_containees(cp);
+ fmd_case_update(cp);
+ }
+ fmd_case_rele(cp);
+ }
+ fmd_module_unlock(xip->xi_queue->eq_mod);
+ fmd_event_hold(e);
+ fmd_event_rele(e);
+ goto done;
+ }
+
+ /*
+ * If a list.isolated event is received, update resource cache status
+ */
+ if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_ISOLATED_CLASS)) {
+ uint8_t *statusp;
+ uint_t nelem = 0;
+
+ (void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS,
+ &statusp, &nelem);
+ fmd_module_lock(xip->xi_queue->eq_mod);
+ if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
+ (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+ if (cip->ci_xprt != NULL)
+ fmd_case_update_status(cp, statusp,
+ cip->ci_proxy_asru, cip->ci_diag_asru);
+ fmd_case_rele(cp);
+ }
+ fmd_module_unlock(xip->xi_queue->eq_mod);
+ fmd_event_hold(e);
+ fmd_event_rele(e);
+ goto done;
+ }
+
+ /*
+ * If a list.resolved event is received, resolve the local case.
*/
- if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_SUSPECT_CLASS) &&
- nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
- nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &code) == 0) {
+ if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_RESOLVED_CLASS)) {
fmd_module_lock(xip->xi_queue->eq_mod);
- (void) fmd_case_recreate(xip->xi_queue->eq_mod,
- xp, FMD_CASE_SOLVED, uuid, code);
+ if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
+ (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
+ fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
+ if (cip->ci_xprt != NULL)
+ fmd_case_transition(cp, (cip->ci_state ==
+ FMD_CASE_REPAIRED) ? FMD_CASE_RESOLVED :
+ (cip->ci_state == FMD_CASE_CLOSED) ?
+ FMD_CASE_REPAIRED : FMD_CASE_CLOSE_WAIT,
+ FMD_CF_RESOLVED);
+ fmd_case_rele(cp);
+ }
fmd_module_unlock(xip->xi_queue->eq_mod);
+ fmd_event_hold(e);
+ fmd_event_rele(e);
+ goto done;
}
- if (logonly == FMD_B_TRUE) {
+ if (logonly == FMD_B_TRUE || (xip->xi_flags & FMD_XPRT_EXTERNAL)) {
+ /*
+ * Don't proxy ereports on an EXTERNAL transport - we won't
+ * know how to diagnose them with the wrong topology. Note
+ * that here (and above) we have to hold/release the event in
+ * order for it to be freed.
+ */
fmd_event_hold(e);
fmd_event_rele(e);
} else if (isproto == FMD_B_TRUE)
@@ -1204,8 +1667,10 @@ fmd_xprt_uuclose(fmd_xprt_t *xp, const char *uuid)
nvlist_t *nvl;
char *s;
- fmd_dprintf(FMD_DBG_XPRT,
- "xprt %u closing case %s\n", xip->xi_id, uuid);
+ if ((xip->xi_flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
+ return; /* read-only transports do not proxy uuclose */
+
+ TRACE((FMD_DBG_XPRT, "xprt %u closing case %s\n", xip->xi_id, uuid));
nvl = fmd_protocol_xprt_uuclose(xip->xi_queue->eq_mod,
"resource.fm.xprt.uuclose", xip->xi_version, uuid);
@@ -1216,6 +1681,58 @@ fmd_xprt_uuclose(fmd_xprt_t *xp, const char *uuid)
}
/*
+ * On proxy side, send back uuresolved request to diagnosing side
+ */
+void
+fmd_xprt_uuresolved(fmd_xprt_t *xp, const char *uuid)
+{
+ fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
+
+ fmd_event_t *e;
+ nvlist_t *nvl;
+ char *s;
+
+ if ((xip->xi_flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
+ return; /* read-only transports do not proxy uuresolved */
+
+ TRACE((FMD_DBG_XPRT, "xprt %u resolving case %s\n", xip->xi_id, uuid));
+
+ nvl = fmd_protocol_xprt_uuresolved(xip->xi_queue->eq_mod,
+ "resource.fm.xprt.uuresolved", xip->xi_version, uuid);
+
+ (void) nvlist_lookup_string(nvl, FM_CLASS, &s);
+ e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
+ fmd_eventq_insert_at_time(xip->xi_queue, e);
+}
+
+/*
+ * On proxy side, send back repair/acquit/etc request to diagnosing side
+ */
+void
+fmd_xprt_updated(fmd_xprt_t *xp, const char *uuid, uint8_t *statusp,
+ uint8_t *has_asrup, uint_t nelem)
+{
+ fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
+
+ fmd_event_t *e;
+ nvlist_t *nvl;
+ char *s;
+
+ if ((xip->xi_flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
+ return; /* read-only transports do not support remote repairs */
+
+ TRACE((FMD_DBG_XPRT, "xprt %u updating case %s\n", xip->xi_id, uuid));
+
+ nvl = fmd_protocol_xprt_updated(xip->xi_queue->eq_mod,
+ "resource.fm.xprt.updated", xip->xi_version, uuid, statusp,
+ has_asrup, nelem);
+
+ (void) nvlist_lookup_string(nvl, FM_CLASS, &s);
+ e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
+ fmd_eventq_insert_at_time(xip->xi_queue, e);
+}
+
+/*
* Insert the specified class into our remote subscription hash. If the class
* is already present, bump the reference count; otherwise add it to the hash
* and then enqueue an event for our remote peer to proxy our subscription.
diff --git a/usr/src/cmd/fm/fmd/common/fmd_xprt.h b/usr/src/cmd/fm/fmd/common/fmd_xprt.h
index 41054fdc35..2aae76d7b9 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_xprt.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_xprt.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -56,6 +56,8 @@ extern fmd_xprt_rule_f fmd_xprt_event_unsuback;
extern fmd_xprt_rule_f fmd_xprt_event_uuclose;
extern fmd_xprt_rule_f fmd_xprt_event_error;
extern fmd_xprt_rule_f fmd_xprt_event_drop;
+extern fmd_xprt_rule_f fmd_xprt_event_uuresolved;
+extern fmd_xprt_rule_f fmd_xprt_event_updated;
typedef struct fmd_xprt_rule {
const char *xr_class; /* pattern to match */
@@ -123,11 +125,19 @@ typedef struct fmd_xprt_impl {
#define FMD_XPRT_RDWR 0x3 /* xprt is read-write */
#define FMD_XPRT_ACCEPT 0x4 /* xprt is accepting connection */
#define FMD_XPRT_SUSPENDED 0x8 /* xprt is suspended by user */
-#define FMD_XPRT_CMASK 0xF /* xprt create flag mask */
#define FMD_XPRT_SUBSCRIBER 0x10 /* xprt is actively subscribing */
#define FMD_XPRT_ISUSPENDED 0x20 /* xprt is waiting for _fmd_init */
#define FMD_XPRT_DSUSPENDED 0x40 /* xprt is suspended by fmd mechanism */
-
+#define FMD_XPRT_EXTERNAL 0x80 /* xprt is external to a chassis */
+#define FMD_XPRT_NO_REMOTE_REPAIR 0x100 /* xprt allows remote repair */
+#define FMD_XPRT_CACHE_AS_LOCAL 0x200 /* xprt caches fault as if local */
+#define FMD_XPRT_HCONLY 0x400 /* xprt only proxies hc-scheme faults */
+#define FMD_XPRT_HC_PRESENT_ONLY 0x800 /* only locally present hc faults */
+
+#define FMD_XPRT_CMASK /* xprt create flag mask */ \
+ (FMD_XPRT_RDWR | FMD_XPRT_ACCEPT | FMD_XPRT_SUSPENDED | \
+ FMD_XPRT_EXTERNAL | FMD_XPRT_NO_REMOTE_REPAIR | \
+ FMD_XPRT_CACHE_AS_LOCAL | FMD_XPRT_HCONLY | FMD_XPRT_HC_PRESENT_ONLY)
#define FMD_XPRT_SMASK \
(FMD_XPRT_SUSPENDED | FMD_XPRT_ISUSPENDED | FMD_XPRT_DSUSPENDED)
@@ -138,6 +148,9 @@ extern void fmd_xprt_xresume(fmd_xprt_t *, uint_t);
extern void fmd_xprt_send(fmd_xprt_t *);
extern void fmd_xprt_recv(fmd_xprt_t *, nvlist_t *, hrtime_t, boolean_t);
extern void fmd_xprt_uuclose(fmd_xprt_t *, const char *);
+extern void fmd_xprt_uuresolved(fmd_xprt_t *, const char *);
+extern void fmd_xprt_updated(fmd_xprt_t *, const char *, uint8_t *, uint8_t *,
+ uint_t);
extern void fmd_xprt_subscribe(fmd_xprt_t *, const char *);
extern void fmd_xprt_unsubscribe(fmd_xprt_t *, const char *);
diff --git a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
index 3c6c172e58..32e9c6504c 100644
--- a/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
+++ b/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c
@@ -493,6 +493,9 @@ cma_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
{
fmd_hdl_debug(hdl, "received %s\n", class);
+ if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
+ return;
+
if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
diff --git a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
index 7e12e7abf6..167873cd8b 100644
--- a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
+++ b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Disk Monitor
*/
@@ -284,6 +282,8 @@ diskmon_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
diskmon_agent_suspect(hdl, nvl);
return;
+ } else if (fmd_nvl_class_match(hdl, nvl, FM_LIST_RESOLVED_CLASS)) {
+ return;
}
/*
diff --git a/usr/src/cmd/fm/modules/common/io-retire/rio_main.c b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c
index 0dfd1415ba..c5953a70cb 100644
--- a/usr/src/cmd/fm/modules/common/io-retire/rio_main.c
+++ b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/fm/protocol.h>
#include <fm/fmd_api.h>
#include <strings.h>
@@ -132,6 +130,7 @@ rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
di_retire_t drt = {0};
int retire;
int rval = 0;
+ int valid_suspect = 0;
int error;
char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO".";
boolean_t rtr;
@@ -155,6 +154,8 @@ rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
retire = 0;
} else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
retire = 0;
+ } else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) {
+ return;
} else if (strncmp(class, snglfault, strlen(snglfault)) == 0) {
retire = 1;
faults = &nvl;
@@ -200,6 +201,7 @@ rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
continue;
}
+ valid_suspect = 1;
if (retire) {
if (fmd_nvl_fmri_has_fault(hdl, asru,
FMD_HAS_FAULT_ASRU, NULL) == 1) {
@@ -226,6 +228,13 @@ rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
}
/*
+ * Don't send uuclose or uuresolved unless at least one suspect
+ * was valid for this retire agent and no retires/unretires failed.
+ */
+ if (valid_suspect == 0)
+ return;
+
+ /*
* The fmd framework takes care of moving a case to the repaired
* state. To move the case to the closed state however, we (the
* retire agent) need to call fmd_case_uuclose()
diff --git a/usr/src/cmd/fm/modules/common/ip-transport/ip.c b/usr/src/cmd/fm/modules/common/ip-transport/ip.c
index a70d62e883..8a5b500b9f 100644
--- a/usr/src/cmd/fm/modules/common/ip-transport/ip.c
+++ b/usr/src/cmd/fm/modules/common/ip-transport/ip.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sysmacros.h>
@@ -94,6 +92,12 @@ static size_t ip_size; /* default buffer size */
static volatile int ip_quit; /* signal to quit */
static int ip_qlen; /* queue length for listen(3SOCKET) */
static int ip_mtbf; /* mtbf for simulating packet drop */
+static int ip_external; /* set transport to be "external" */
+static int ip_no_remote_repair; /* disallow remote repair */
+static int ip_hconly; /* only cache faults that are hc-scheme */
+static int ip_rdonly; /* force transport to be rdonly */
+static int ip_hc_present_only; /* only cache faults if hc-scheme and present */
+static char *ip_domain_name; /* set domain name for received list.suspects */
static hrtime_t ip_burp; /* make mtbf slower by adding this much delay */
static int ip_translate; /* call fmd_xprt_translate() before sending */
static char *ip_host; /* host to connect to (or NULL if server) */
@@ -323,8 +327,11 @@ ip_xprt_recv_event(ip_xprt_t *ipx)
fmd_hdl_error(ip_hdl, "failed to unpack event from "
"transport %p: %s\n", (void *)ipx->ipx_xprt, strerror(err));
ip_stat.ips_unpackfail.fmds_value.ui64++;
- } else
+ } else {
+ if (ip_domain_name)
+ fmd_xprt_add_domain(ip_hdl, nvl, ip_domain_name);
fmd_xprt_post(ip_hdl, ipx->ipx_xprt, nvl, 0);
+ }
if (fmd_xprt_error(ip_hdl, ipx->ipx_xprt)) {
fmd_hdl_error(ip_hdl, "protocol error on transport %p",
@@ -466,10 +473,26 @@ ip_xprt_setup(fmd_hdl_t *hdl)
struct addrinfo *aip;
const char *s1, *s2;
+ /*
+ * Set up flags as specified in the .conf file. Note that these are
+ * mostly only used for testing purposes, allowing the transport to
+ * be set up in various modes.
+ */
if (ip_host != NULL)
- xflags = FMD_XPRT_RDWR;
+ xflags = (ip_rdonly == FMD_B_TRUE) ? FMD_XPRT_RDONLY :
+ FMD_XPRT_RDWR;
else
- xflags = FMD_XPRT_RDWR | FMD_XPRT_ACCEPT;
+ xflags = ((ip_rdonly == FMD_B_TRUE) ? FMD_XPRT_RDONLY :
+ FMD_XPRT_RDWR) | FMD_XPRT_ACCEPT;
+
+ if (ip_external == FMD_B_TRUE)
+ xflags |= FMD_XPRT_EXTERNAL;
+ if (ip_no_remote_repair == FMD_B_TRUE)
+ xflags |= FMD_XPRT_NO_REMOTE_REPAIR;
+ if (ip_hconly == FMD_B_TRUE)
+ xflags |= FMD_XPRT_HCONLY;
+ if (ip_hc_present_only == FMD_B_TRUE)
+ xflags |= FMD_XPRT_HC_PRESENT_ONLY;
for (aip = ip_ail; aip != NULL; aip = aip->ai_next) {
if (aip->ai_family != AF_INET && aip->ai_family != AF_INET6)
@@ -554,6 +577,12 @@ static const fmd_prop_t fmd_props[] = {
{ "ip_burp", FMD_TYPE_TIME, "0" },
{ "ip_enable", FMD_TYPE_BOOL, "false" },
{ "ip_mtbf", FMD_TYPE_INT32, "0" },
+ { "ip_external", FMD_TYPE_BOOL, "true" },
+ { "ip_no_remote_repair", FMD_TYPE_BOOL, "true" },
+ { "ip_hconly", FMD_TYPE_BOOL, "false" },
+ { "ip_rdonly", FMD_TYPE_BOOL, "false" },
+ { "ip_hc_present_only", FMD_TYPE_BOOL, "false" },
+ { "ip_domain_name", FMD_TYPE_STRING, NULL },
{ "ip_port", FMD_TYPE_STRING, "664" },
{ "ip_qlen", FMD_TYPE_INT32, "32" },
{ "ip_retry", FMD_TYPE_UINT32, "50" },
@@ -614,6 +643,12 @@ _fmd_init(fmd_hdl_t *hdl)
ip_burp = fmd_prop_get_int64(hdl, "ip_burp");
ip_mtbf = fmd_prop_get_int32(hdl, "ip_mtbf");
+ ip_external = fmd_prop_get_int32(hdl, "ip_external");
+ ip_no_remote_repair = fmd_prop_get_int32(hdl, "ip_no_remote_repair");
+ ip_hconly = fmd_prop_get_int32(hdl, "ip_hconly");
+ ip_rdonly = fmd_prop_get_int32(hdl, "ip_rdonly");
+ ip_hc_present_only = fmd_prop_get_int32(hdl, "ip_hc_present_only");
+ ip_domain_name = fmd_prop_get_string(hdl, "ip_domain_name");
ip_qlen = fmd_prop_get_int32(hdl, "ip_qlen");
ip_retry = fmd_prop_get_int32(hdl, "ip_retry");
ip_sleep = fmd_prop_get_int64(hdl, "ip_sleep");
diff --git a/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c b/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c
index 08c421915c..8bacd0783a 100644
--- a/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c
+++ b/usr/src/cmd/fm/modules/common/syslog-msgs/syslog.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/fm/protocol.h>
#include <sys/strlog.h>
#include <sys/log.h>
@@ -244,6 +242,13 @@ syslog_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
int locale_c = 0;
size_t len;
+ /*
+ * don't log updated and isolated events (for now)
+ */
+ if (strcmp(class, FM_LIST_ISOLATED_CLASS) == 0 ||
+ strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
+ return;
+
if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
version > FM_SUSPECT_VERSION) {
fmd_hdl_debug(hdl, "invalid event version: %u\n", version);
@@ -549,6 +554,8 @@ _fmd_init(fmd_hdl_t *hdl)
fmd_prop_free_string(hdl, rootdir);
fmd_hdl_subscribe(hdl, FM_LIST_SUSPECT_CLASS);
+ fmd_hdl_subscribe(hdl, FM_LIST_UPDATED_CLASS);
+ fmd_hdl_subscribe(hdl, FM_LIST_ISOLATED_CLASS);
fmd_hdl_subscribe(hdl, FM_LIST_REPAIRED_CLASS);
fmd_hdl_subscribe(hdl, FM_LIST_RESOLVED_CLASS);
}
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
index 72535443d9..3f0a6eee43 100644
--- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* The ZFS retire agent is responsible for managing hot spares across all pools.
* When we see a device fault or a device removal, we try to open the associated
@@ -211,6 +209,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
nvlist_t *vdev;
char *uuid;
int repair_done = 0;
+ boolean_t retire;
/*
* If this is a resource notifying us of device removal, then simply
@@ -233,6 +232,9 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
return;
}
+ if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
+ return;
+
if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
is_repair = B_TRUE;
else
@@ -251,6 +253,10 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
fault_device = B_FALSE;
degrade_device = B_FALSE;
+ if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE,
+ &retire) == 0 && retire == 0)
+ continue;
+
/*
* While we subscribe to fault.fs.zfs.*, we only take action
* for faults targeting a specific vdev (open failure or SERD
diff --git a/usr/src/uts/common/sys/fm/protocol.h b/usr/src/uts/common/sys/fm/protocol.h
index cb05dd7439..b79d244692 100644
--- a/usr/src/uts/common/sys/fm/protocol.h
+++ b/usr/src/uts/common/sys/fm/protocol.h
@@ -129,6 +129,8 @@ extern "C" {
#define FM_RSRC_XPRT_VERSION FM_RSRC_XPRT_VERS0
#define FM_RSRC_XPRT_UUID "uuid"
#define FM_RSRC_XPRT_SUBCLASS "subclass"
+#define FM_RSRC_XPRT_FAULT_STATUS "fault-status"
+#define FM_RSRC_XPRT_FAULT_HAS_ASRU "fault-has-asru"
/*
* FM ENA Format Macros