summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c9
-rw-r--r--usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c61
-rw-r--r--usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_def.h10
3 files changed, 69 insertions, 11 deletions
diff --git a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c
index b27a8221d7..18dfcdef51 100644
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c
+++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c
@@ -423,6 +423,15 @@ pmcs_scsa_tran_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
ASSERT(target->phy);
phyp = target->phy;
+ if (target->recover_wait) {
+ mutex_exit(&target->statlock);
+ mutex_exit(&pwp->lock);
+ pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, phyp, target, "%s: "
+ "Target 0x%p in device state recovery, fail tran_tgt_free",
+ __func__, (void *)target);
+ return;
+ }
+
/*
* If this target still has a PHY pointer and that PHY's target pointer
* has been cleared, then that PHY has been reaped. In that case, there
diff --git a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c
index 721a378624..ca134dd854 100644
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c
+++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c
@@ -2875,6 +2875,9 @@ pmcs_clear_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
/* keep phynum */
pptr->width = 0;
pptr->ds_recovery_retries = 0;
+ pptr->ds_prev_good_recoveries = 0;
+ pptr->last_good_recovery = 0;
+ pptr->prev_recovery = 0;
/* keep dtype */
pptr->config_stop = 0;
pptr->spinup_hold = 0;
@@ -6982,20 +6985,25 @@ pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
}
tgt = pptr->target;
- if (tgt == NULL || tgt->dev_gone) {
- if (pptr->dtype != NOTHING) {
- pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, tgt,
- "%s: no target for DS error recovery for "
- "PHY 0x%p", __func__, (void *)pptr);
+
+ if (tgt != NULL) {
+ mutex_enter(&tgt->statlock);
+ if (tgt->recover_wait == 0) {
+ goto next_phy;
}
- goto next_phy;
}
- mutex_enter(&tgt->statlock);
-
- if (tgt->recover_wait == 0) {
- goto next_phy;
+ if (pptr->prev_recovery) {
+ if (ddi_get_lbolt() - pptr->prev_recovery <
+ drv_usectohz(PMCS_DS_RECOVERY_INTERVAL)) {
+ pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, tgt,
+ "%s: DS recovery on PHY %s "
+ "re-invoked too soon. Skipping...",
+ __func__, pptr->path);
+ goto next_phy;
+ }
}
+ pptr->prev_recovery = ddi_get_lbolt();
/*
* Step 1: Put the device into the IN_RECOVERY state
@@ -7101,7 +7109,22 @@ pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
PMCS_DEVICE_STATE_OPERATIONAL);
if (rc == 0) {
tgt->recover_wait = 0;
+
pptr->ds_recovery_retries = 0;
+ if ((pptr->ds_prev_good_recoveries == 0) ||
+ (ddi_get_lbolt() - pptr->last_good_recovery >
+ drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME))) {
+ pptr->last_good_recovery = ddi_get_lbolt();
+ pptr->ds_prev_good_recoveries = 1;
+ } else if (ddi_get_lbolt() < pptr->last_good_recovery +
+ drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)) {
+ pptr->ds_prev_good_recoveries++;
+ } else {
+ pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
+ __func__, __LINE__, "Max recovery"
+ "attempts reached. Declaring PHY dead");
+ }
+
/*
* Don't bother to run the work queues if the PHY
* is dead.
@@ -8260,6 +8283,7 @@ pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt,
pmcs_hw_t *pwp, const char *func_name, int line, char *reason_string)
{
ASSERT(mutex_owned(&phyp->phy_lock));
+ ASSERT((tgt == NULL) || mutex_owned(&tgt->statlock));
phyp->ds_recovery_retries++;
@@ -8267,7 +8291,22 @@ pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt,
pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt,
"%s: retry limit reached after %s to PHY %s failed",
func_name, reason_string, phyp->path);
- tgt->recover_wait = 0;
+ if (tgt != NULL) {
+ tgt->recover_wait = 0;
+ }
+ phyp->dead = 1;
+ PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line);
+ RESTART_DISCOVERY(pwp);
+ } else if ((phyp->ds_prev_good_recoveries >
+ PMCS_MAX_DS_RECOVERY_RETRIES) &&
+ (phyp->last_good_recovery + drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)
+ < ddi_get_lbolt())) {
+ pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt, "%s: max number of "
+ "successful recoveries reached, declaring PHY %s dead",
+ __func__, phyp->path);
+ if (tgt != NULL) {
+ tgt->recover_wait = 0;
+ }
phyp->dead = 1;
PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line);
RESTART_DISCOVERY(pwp);
diff --git a/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_def.h b/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_def.h
index d3c075095b..5a8e73ecb8 100644
--- a/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_def.h
+++ b/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_def.h
@@ -72,6 +72,10 @@ struct pmcs_phy {
uint8_t phynum; /* phy number on parent expander */
uint8_t width; /* how many phys wide */
uint8_t ds_recovery_retries; /* # error retry attempts */
+ uint8_t ds_prev_good_recoveries; /* # successful recoveries */
+ clock_t prev_recovery; /* previous successful recovery */
+ clock_t last_good_recovery; /* oldest successful recovery */
+ /* within PMCS_MAX_DS_RECOVERY_TIME time frame */
pmcs_dtype_t dtype; /* current dtype of the phy */
pmcs_dtype_t pend_dtype; /* new dtype (pending change) */
uint32_t
@@ -113,6 +117,12 @@ struct pmcs_phy {
/* maximum number of ds recovery retries (ds_recovery_retries) */
#define PMCS_MAX_DS_RECOVERY_RETRIES 4
+/* max time allowed for successful recovery */
+#define PMCS_MAX_DS_RECOVERY_TIME (60 * 1000000) /* 60 seconds */
+
+/* ds recovery on same same phy is not allowed within this interval */
+#define PMCS_DS_RECOVERY_INTERVAL (1000000) /* 1 second */
+
/*
* Inbound and Outbound Queue Related Definitions.