summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormws <none@none>2006-03-06 11:34:14 -0800
committermws <none@none>2006-03-06 11:34:14 -0800
commit162ba6eabdbf6535d914d76133fa5760fb1b231c (patch)
tree3ae944f1964cc33cf034f325eb3293afd0c8fd18
parent673007c6bdf2eee5b45916949d1b2e7ed00efbe3 (diff)
downloadillumos-gate-162ba6eabdbf6535d914d76133fa5760fb1b231c.tar.gz
6377640 fmd build can try to re-install .so links as directories
6390114 fmd asru cache recreation flawed with fix for 6369961 6390205 fmd publishing list.isolated for cases that were never solved 6390232 fmadm repair <fmri> can lead to surprises 6390296 fmd dumps core when stopping during BFU
-rw-r--r--usr/src/cmd/fm/fmd/Makefile.fmd2
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd.c10
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_asru.c99
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_asru.h4
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_case.c116
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_case.h9
-rw-r--r--usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c18
7 files changed, 154 insertions, 104 deletions
diff --git a/usr/src/cmd/fm/fmd/Makefile.fmd b/usr/src/cmd/fm/fmd/Makefile.fmd
index 1d534bac7d..95140c4af2 100644
--- a/usr/src/cmd/fm/fmd/Makefile.fmd
+++ b/usr/src/cmd/fm/fmd/Makefile.fmd
@@ -215,7 +215,7 @@ $(ROOTVDIR): $(ROOT)/var/fm
$(ROOTVSUB): $(ROOTVDIR)
$(INS.dir)
-$(ROOT)/usr/lib/%:
+$(ROOT)/usr/lib/fm:
$(INS.dir)
$(ROOTPDIR): $(ROOT)/usr/lib/fm
diff --git a/usr/src/cmd/fm/fmd/common/fmd.c b/usr/src/cmd/fm/fmd/common/fmd.c
index 5e88f48750..39094ffb92 100644
--- a/usr/src/cmd/fm/fmd/common/fmd.c
+++ b/usr/src/cmd/fm/fmd/common/fmd.c
@@ -560,7 +560,6 @@ fmd_destroy(fmd_t *dp)
while ((cp = fmd_list_next(&dp->d_rmod->mod_cases)) != NULL)
fmd_case_discard(cp);
-
fmd_module_unlock(dp->d_rmod);
fmd_free(dp->d_rmod->mod_stats, sizeof (fmd_modstat_t));
dp->d_rmod->mod_stats = NULL;
@@ -864,6 +863,13 @@ fmd_run(fmd_t *dp, int pfd)
if (pfd >= 0)
(void) write(pfd, &status, sizeof (status));
+ /*
+ * Before loading all modules, repopulate the ASRU cache from its
+ * persistent repository on disk. Then during module loading, the
+ * restoration of checkpoint files will reparent any active cases.
+ */
+ fmd_asru_hash_refresh(dp->d_asrus);
+
(void) fmd_conf_getprop(dp->d_conf, "plugin.path", &pap);
fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_rtld_ops, ".so");
@@ -876,7 +882,7 @@ fmd_run(fmd_t *dp, int pfd)
* that did not finish processing the last time ran, and then release
* the global module barrier by executing a final rele on d_mod_event.
*/
- fmd_asru_hash_refresh(dp->d_asrus);
+ fmd_asru_hash_replay(dp->d_asrus);
(void) pthread_rwlock_rdlock(&dp->d_log_lock);
fmd_log_replay(dp->d_errlog, (fmd_log_f *)fmd_err_replay, dp);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_asru.c b/usr/src/cmd/fm/fmd/common/fmd_asru.c
index 6c97b9618b..74faa9a965 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_asru.c
@@ -102,6 +102,7 @@ fmd_asru_destroy(fmd_asru_t *ap)
if (ap->asru_case != NULL)
fmd_case_rele(ap->asru_case);
+ nvlist_free(ap->asru_event);
fmd_strfree(ap->asru_name);
nvlist_free(ap->asru_fmri);
fmd_strfree(ap->asru_root);
@@ -165,10 +166,7 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
char *name = NULL;
ssize_t namelen;
- nvlist_t *fmri, *flt;
- fmd_event_t *e;
- char *class;
-
+ nvlist_t *fmri, *flt, *flt_copy;
boolean_t f, u, m;
fmd_asru_t *ap;
int ps, us;
@@ -233,12 +231,10 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
* delete the existing entry and continue on using the new entry; if
* the new entry is no "better", return an error and ignore it.
*/
- if ((ap = fmd_asru_hash_lookup(ahp, name)) != NULL &&
- (ap->asru_flags & FMD_ASRU_RECREATED)) {
+ if ((ap = fmd_asru_hash_lookup(ahp, name)) != NULL) {
if (!u && (ap->asru_flags & FMD_ASRU_UNUSABLE)) {
(void) fmd_asru_hash_delete_name(ahp, name);
fmd_asru_hash_release(ahp, ap);
- ap = NULL;
} else {
fmd_error(EFMD_ASRU_DUP, "removing duplicate asru "
"log %s for %s\n", lp->log_name, name);
@@ -249,14 +245,12 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
}
}
- if (ap == NULL) {
- ap = fmd_asru_create(ahp,
- fmd_strbasename(lp->log_name), name, fmri);
- }
-
+ ap = fmd_asru_create(ahp, fmd_strbasename(lp->log_name), name, fmri);
fmd_free(name, namelen + 1);
ap->asru_flags |= FMD_ASRU_RECREATED;
+ if (ps)
+ ap->asru_flags |= FMD_ASRU_PRESENT;
if (f)
ap->asru_flags |= FMD_ASRU_FAULTY;
if (u)
@@ -292,37 +286,17 @@ fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
if (ap->asru_case != NULL && fmd_case_orphaned(ap->asru_case)) {
(void) nvlist_xdup(flt, &ap->asru_event, &fmd.d_nva);
- fmd_case_recreate_suspect(
- ap->asru_case, ap->asru_event);
+ (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
+ fmd_case_recreate_suspect(ap->asru_case, flt_copy);
}
}
- if (!(ap->asru_flags & FMD_ASRU_VALID)) {
- ap->asru_flags |= FMD_ASRU_VALID;
- fmd_asru_hash_insert(ahp, ap);
- }
+ ASSERT(!(ap->asru_flags & FMD_ASRU_VALID));
+ ap->asru_flags |= FMD_ASRU_VALID;
+ fmd_asru_hash_insert(ahp, ap);
TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", ap->asru_uuid,
(void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
-
- /*
- * If the resource is present and faulty but not unusable, replay the
- * fault event that caused it be marked faulty. This will cause the
- * agent subscribing to this fault class to again disable the resource.
- */
- if (ap->asru_case != NULL && !(ap->asru_flags & FMD_ASRU_UNUSABLE)) {
- fmd_dprintf(FMD_DBG_ASRU,
- "replaying fault event for %s", ap->asru_name);
-
- (void) nvlist_xdup(flt, &flt, &fmd.d_nva);
- (void) nvlist_lookup_string(flt, FM_CLASS, &class);
-
- if (case_uuid != NULL)
- (void) nvlist_add_string(flt, FMD_EVN_UUID, case_uuid);
-
- e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, flt, class);
- fmd_dispq_dispatch(fmd.d_disp, e, class);
- }
}
static void
@@ -400,6 +374,42 @@ fmd_asru_hash_refresh(fmd_asru_hash_t *ahp)
(void) closedir(dirp);
}
+/*
+ * If the resource is present and faulty but not unusable, replay the fault
+ * event that caused it be marked faulty. This will cause the agent
+ * subscribing to this fault class to again disable the resource.
+ */
+/*ARGSUSED*/
+static void
+fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data)
+{
+ fmd_event_t *e;
+ nvlist_t *nvl;
+ char *class;
+
+ if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE |
+ FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) {
+
+ fmd_dprintf(FMD_DBG_ASRU,
+ "replaying fault event for %s", ap->asru_name);
+
+ (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
+ (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
+
+ (void) nvlist_add_string(nvl, FMD_EVN_UUID,
+ ((fmd_case_impl_t *)ap->asru_case)->ci_uuid);
+
+ e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
+ fmd_dispq_dispatch(fmd.d_disp, e, class);
+ }
+}
+
+void
+fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
+{
+ fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL);
+}
+
fmd_asru_hash_t *
fmd_asru_hash_create(const char *root, const char *dir)
{
@@ -699,6 +709,7 @@ int
fmd_asru_setflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
{
fmd_case_t *old_case = NULL;
+ nvlist_t *old_nvl = NULL;
uint_t nstate, ostate;
boolean_t msg;
@@ -720,7 +731,8 @@ fmd_asru_setflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
old_case = ap->asru_case;
fmd_case_hold_locked(cp);
ap->asru_case = cp;
- ap->asru_event = nvl;
+ old_nvl = ap->asru_event;
+ (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
}
if (nvl != NULL && nvlist_lookup_boolean_value(nvl,
@@ -738,6 +750,9 @@ fmd_asru_setflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
if (old_case != NULL)
fmd_case_rele(old_case);
+ if (old_nvl != NULL)
+ nvlist_free(old_nvl);
+
return (1);
}
@@ -745,6 +760,7 @@ int
fmd_asru_clrflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
{
fmd_case_t *old_case = NULL;
+ nvlist_t *old_nvl = NULL;
uint_t nstate, ostate;
ASSERT(!(sflag & ~FMD_ASRU_STATE));
@@ -765,7 +781,8 @@ fmd_asru_clrflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
old_case = ap->asru_case;
fmd_case_hold_locked(cp);
ap->asru_case = cp;
- ap->asru_event = nvl;
+ old_nvl = ap->asru_event;
+ (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
}
TRACE((FMD_DBG_ASRU, "asru %s %s->%s", ap->asru_uuid,
@@ -776,6 +793,7 @@ fmd_asru_clrflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
if (cp == NULL && (sflag & FMD_ASRU_FAULTY)) {
old_case = ap->asru_case;
ap->asru_case = NULL;
+ old_nvl = ap->asru_event;
ap->asru_event = NULL;
}
@@ -788,6 +806,9 @@ fmd_asru_clrflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
fmd_case_rele(old_case);
}
+ if (old_nvl != NULL)
+ nvlist_free(old_nvl);
+
return (1);
}
diff --git a/usr/src/cmd/fm/fmd/common/fmd_asru.h b/usr/src/cmd/fm/fmd/common/fmd_asru.h
index 3dee60fe27..0d7428a798 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_asru.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_asru.h
@@ -53,7 +53,7 @@ typedef struct fmd_asru {
uint_t asru_refs; /* reference count */
uint_t asru_flags; /* flags (see below) */
fmd_case_t *asru_case; /* case associated with last change */
- nvlist_t *asru_event; /* case event inside of asru_case */
+ nvlist_t *asru_event; /* event associated with last change */
} fmd_asru_t;
#define FMD_ASRU_FAULTY 0x01 /* asru has been diagnosed as faulty */
@@ -62,6 +62,7 @@ typedef struct fmd_asru {
#define FMD_ASRU_INTERNAL 0x08 /* asru is managed by fmd itself */
#define FMD_ASRU_INVISIBLE 0x10 /* asru is not visibly administered */
#define FMD_ASRU_RECREATED 0x20 /* asru recreated by cache replay */
+#define FMD_ASRU_PRESENT 0x40 /* asru present at last R$ update */
#define FMD_ASRU_STATE (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE)
@@ -78,6 +79,7 @@ typedef struct fmd_asru_hash {
extern fmd_asru_hash_t *fmd_asru_hash_create(const char *, const char *);
extern void fmd_asru_hash_destroy(fmd_asru_hash_t *);
extern void fmd_asru_hash_refresh(fmd_asru_hash_t *);
+extern void fmd_asru_hash_replay(fmd_asru_hash_t *);
extern void fmd_asru_hash_apply(fmd_asru_hash_t *,
void (*)(fmd_asru_t *, void *), void *);
diff --git a/usr/src/cmd/fm/fmd/common/fmd_case.c b/usr/src/cmd/fm/fmd/common/fmd_case.c
index 0e9795cec9..e0f90b5fd3 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_case.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_case.c
@@ -541,6 +541,23 @@ fmd_case_create(fmd_module_t *mp, void *data)
return ((fmd_case_t *)cip);
}
+static void
+fmd_case_destroy_suspects(fmd_case_impl_t *cip)
+{
+ fmd_case_susp_t *cis, *ncis;
+
+ ASSERT(MUTEX_HELD(&cip->ci_lock));
+
+ for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
+ ncis = cis->cis_next;
+ nvlist_free(cis->cis_nvl);
+ fmd_free(cis, sizeof (fmd_case_susp_t));
+ }
+
+ cip->ci_suspects = NULL;
+ cip->ci_nsuspects = 0;
+}
+
fmd_case_t *
fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
uint_t state, const char *uuid, const char *code)
@@ -612,6 +629,9 @@ fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
cip->ci_mod = mp;
fmd_module_hold(mp);
+ fmd_case_destroy_suspects(cip);
+ cip->ci_state = state;
+
(void) pthread_mutex_unlock(&cip->ci_lock);
fmd_case_rele((fmd_case_t *)cip);
}
@@ -631,7 +651,6 @@ fmd_case_destroy(fmd_case_t *cp, int visible)
{
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
fmd_case_item_t *cit, *ncit;
- fmd_case_susp_t *cis, *ncis;
ASSERT(MUTEX_HELD(&cip->ci_lock));
ASSERT(cip->ci_refs == 0);
@@ -647,11 +666,7 @@ fmd_case_destroy(fmd_case_t *cp, int visible)
fmd_free(cit, sizeof (fmd_case_item_t));
}
- for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
- ncis = cis->cis_next;
- nvlist_free(cis->cis_nvl);
- fmd_free(cis, sizeof (fmd_case_susp_t));
- }
+ fmd_case_destroy_suspects(cip);
if (cip->ci_principal != NULL)
fmd_event_rele(cip->ci_principal);
@@ -834,20 +849,12 @@ void
fmd_case_reset_suspects(fmd_case_t *cp)
{
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
- fmd_case_susp_t *cis, *ncis;
(void) pthread_mutex_lock(&cip->ci_lock);
ASSERT(cip->ci_state < FMD_CASE_SOLVED);
- for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
- ncis = cis->cis_next;
- nvlist_free(cis->cis_nvl);
- fmd_free(cis, sizeof (fmd_case_susp_t));
- }
-
+ fmd_case_destroy_suspects(cip);
cip->ci_flags |= FMD_CF_DIRTY;
- cip->ci_suspects = NULL;
- cip->ci_nsuspects = 0;
(void) pthread_mutex_unlock(&cip->ci_lock);
fmd_module_setcdirty(cip->ci_mod);
@@ -863,7 +870,6 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
{
fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
- uint_t old_state;
fmd_case_susp_t *cis;
fmd_case_item_t *cit;
fmd_asru_t *asru;
@@ -872,6 +878,10 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
ASSERT(state <= FMD_CASE_REPAIRED);
(void) pthread_mutex_lock(&cip->ci_lock);
+
+ if (!(cip->ci_flags & FMD_CF_SOLVED))
+ flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED);
+
cip->ci_flags |= flags;
if (cip->ci_state >= state) {
@@ -882,7 +892,6 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
_fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
- old_state = cip->ci_state;
cip->ci_state = state;
cip->ci_flags |= FMD_CF_DIRTY;
@@ -927,29 +936,16 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
}
close_wait_finish:
- if (!fmd_case_orphaned(cp))
- break; /* state transition complete */
-
/*
* If an orphaned case transitions to CLOSE_WAIT, the owning
* module is no longer loaded: continue on to CASE_CLOSED.
*/
- state = cip->ci_state = FMD_CASE_CLOSED;
- /*FALLTHRU*/
-
- case FMD_CASE_CLOSED:
- ASSERT(fmd_case_orphaned(cp));
- fmd_module_lock(cip->ci_mod);
- fmd_list_append(&cip->ci_mod->mod_cases, cip);
- fmd_module_unlock(cip->ci_mod);
+ if (fmd_case_orphaned(cp))
+ state = cip->ci_state = FMD_CASE_CLOSED;
break;
case FMD_CASE_REPAIRED:
ASSERT(fmd_case_orphaned(cp));
-
- if (old_state == FMD_CASE_CLOSE_WAIT)
- break; /* case was never closed (transition 6 above) */
-
fmd_module_lock(cip->ci_mod);
fmd_list_delete(&cip->ci_mod->mod_cases, cip);
fmd_module_unlock(cip->ci_mod);
@@ -976,12 +972,11 @@ fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
}
/*
- * If we transitioned to CLOSED or REPAIRED, adjust the reference count
- * to reflect our addition to or removal from fmd.d_rmod->mod_cases.
+ * If we transitioned to REPAIRED, adjust the reference count to
+ * reflect our removal from fmd.d_rmod->mod_cases. If the caller has
+ * not placed an additional hold on the case, it will now be freed.
*/
- if (state == FMD_CASE_CLOSED)
- fmd_case_hold(cp);
- else if (state == FMD_CASE_REPAIRED && old_state != FMD_CASE_CLOSE_WAIT)
+ if (state == FMD_CASE_REPAIRED)
fmd_case_rele(cp);
}
@@ -1099,7 +1094,8 @@ fmd_case_update(fmd_case_t *cp)
(void) pthread_mutex_lock(&cip->ci_lock);
cstate = cip->ci_state;
- if (cip->ci_xprt != NULL || cip->ci_state < FMD_CASE_SOLVED) {
+ if ((cip->ci_flags & FMD_CF_REPAIRING) ||
+ cip->ci_xprt != NULL || cip->ci_state < FMD_CASE_SOLVED) {
(void) pthread_mutex_unlock(&cip->ci_lock);
return; /* update is not appropriate */
}
@@ -1163,6 +1159,18 @@ fmd_case_delete(fmd_case_t *cp)
fmd_module_hold(cip->ci_mod);
/*
+ * If the case is not proxied and it has been solved, then retain it
+ * on the root module's case list at least until we're transitioned.
+ * Otherwise free the case with our final fmd_case_rele() below.
+ */
+ if (cip->ci_xprt == NULL && (cip->ci_flags & FMD_CF_SOLVED)) {
+ fmd_module_lock(cip->ci_mod);
+ fmd_list_append(&cip->ci_mod->mod_cases, cip);
+ fmd_module_unlock(cip->ci_mod);
+ fmd_case_hold(cp);
+ }
+
+ /*
* If a proxied case finishes CLOSE_WAIT, then it can be discarded
* rather than orphaned because by definition it can have no entries
* in the resource cache of the current fault manager.
@@ -1191,6 +1199,14 @@ fmd_case_discard(fmd_case_t *cp)
fmd_case_rele(cp);
}
+static void
+fmd_case_repair_containee(fmd_asru_t *ee, void *er)
+{
+ if ((ee->asru_flags & FMD_ASRU_FAULTY) &&
+ fmd_fmri_contains(er, ee->asru_fmri) > 0)
+ (void) fmd_asru_clrflags(ee, FMD_ASRU_FAULTY, NULL, NULL);
+}
+
/*
* Indicate that the problem corresponding to a case has been repaired by
* clearing the faulty bit on each ASRU named as a suspect. If the case hasn't
@@ -1218,7 +1234,7 @@ fmd_case_repair(fmd_case_t *cp)
return (fmd_set_errno(EFMD_CASE_OWNER));
}
- if (cstate < FMD_CASE_SOLVED) {
+ if (cstate < FMD_CASE_SOLVED || (cip->ci_flags & FMD_CF_REPAIRING)) {
(void) pthread_mutex_unlock(&cip->ci_lock);
return (fmd_set_errno(EFMD_CASE_STATE));
}
@@ -1239,15 +1255,35 @@ fmd_case_repair(fmd_case_t *cp)
aa[i] = fmd_asru_hash_lookup_nvl(ahp, nvl, FMD_B_FALSE);
}
+ cip->ci_flags |= FMD_CF_REPAIRING;
(void) pthread_mutex_unlock(&cip->ci_lock);
+ /*
+ * For each suspect ASRU, if the case associated with this ASRU matches
+ * case 'cp', close all ASRUs contained by 'ap' and clear FAULTY. Note
+ * that at present, we're assuming that when a given resource FMRI R1
+ * contains another R2, that any faults are related by a common
+ * diagnosis engine. This is true in our current architecture, but may
+ * not always be true, at which point we'll need more cleverness here.
+ */
for (i = 0; i < an; i++) {
if (aa[i] == NULL)
continue; /* no asru was found */
- (void) fmd_asru_clrflags(aa[i], FMD_ASRU_FAULTY, NULL, NULL);
+
+ if (aa[i]->asru_case == cp) {
+ fmd_asru_hash_apply(fmd.d_asrus,
+ fmd_case_repair_containee, aa[i]->asru_fmri);
+ (void) fmd_asru_clrflags(aa[i],
+ FMD_ASRU_FAULTY, NULL, NULL);
+ }
+
fmd_asru_hash_release(ahp, aa[i]);
}
+ (void) pthread_mutex_lock(&cip->ci_lock);
+ cip->ci_flags &= ~FMD_CF_REPAIRING;
+ (void) pthread_mutex_unlock(&cip->ci_lock);
+
if (cstate == FMD_CASE_CLOSED)
fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
else
diff --git a/usr/src/cmd/fm/fmd/common/fmd_case.h b/usr/src/cmd/fm/fmd/common/fmd_case.h
index f8eb51fe2b..6c208c3c5f 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_case.h
+++ b/usr/src/cmd/fm/fmd/common/fmd_case.h
@@ -84,10 +84,11 @@ typedef struct fmd_case_impl {
#define FMD_CASE_CLOSED 3 /* case is closed (reconfig done) */
#define FMD_CASE_REPAIRED 4 /* case is repaired (can be freed) */
-#define FMD_CF_DIRTY 0x1 /* case is in need of checkpoint */
-#define FMD_CF_SOLVED 0x2 /* case has been solved */
-#define FMD_CF_ISOLATED 0x4 /* case has been isolated */
-#define FMD_CF_REPAIRED 0x8 /* case has been repaired */
+#define FMD_CF_DIRTY 0x01 /* case is in need of checkpoint */
+#define FMD_CF_SOLVED 0x02 /* case has been solved */
+#define FMD_CF_ISOLATED 0x04 /* case has been isolated */
+#define FMD_CF_REPAIRED 0x08 /* case has been repaired */
+#define FMD_CF_REPAIRING 0x10 /* case repair in progress */
typedef struct fmd_case_hash {
pthread_rwlock_t ch_lock; /* lock protecting case hash */
diff --git a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
index 1e1a7e1fa0..092bab738a 100644
--- a/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
+++ b/usr/src/cmd/fm/fmd/common/fmd_rpc_adm.c
@@ -667,20 +667,6 @@ fmd_adm_logrotate_1_svc(char *name, int *rvp, struct svc_req *req)
return (TRUE);
}
-/*
- * If the case associated with this ASRU matches our input case, close all
- * ASRUs contained by 'ap' and trigger appropriate case close events.
- */
-static void
-fmd_adm_caserepair_asru(fmd_asru_t *ap, void *cp)
-{
- if (ap->asru_case == cp) {
- fmd_asru_hash_apply(fmd.d_asrus,
- fmd_adm_repair_containee, ap->asru_fmri);
- (void) fmd_asru_clrflags(ap, FMD_ASRU_FAULTY, NULL, NULL);
- }
-}
-
bool_t
fmd_adm_caserepair_1_svc(char *uuid, int *rvp, struct svc_req *req)
{
@@ -691,9 +677,7 @@ fmd_adm_caserepair_1_svc(char *uuid, int *rvp, struct svc_req *req)
err = FMD_ADM_ERR_PERM;
else if ((cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) == NULL)
err = FMD_ADM_ERR_CASESRCH;
- else if (fmd_case_repair(cp) == 0)
- fmd_asru_hash_apply(fmd.d_asrus, fmd_adm_caserepair_asru, cp);
- else {
+ else if (fmd_case_repair(cp) != 0) {
err = errno == EFMD_CASE_OWNER ?
FMD_ADM_ERR_CASEXPRT : FMD_ADM_ERR_CASEOPEN;
}