summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/uts/common/io/cpqary3/cpqary3.c21
-rw-r--r--usr/src/uts/common/io/cpqary3/cpqary3.h12
-rw-r--r--usr/src/uts/common/io/cpqary3/cpqary3_isr.c36
-rw-r--r--usr/src/uts/common/io/cpqary3/cpqary3_talk2ctlr.c10
-rw-r--r--usr/src/uts/common/io/cpqary3/cpqary3_util.c157
5 files changed, 108 insertions, 128 deletions
diff --git a/usr/src/uts/common/io/cpqary3/cpqary3.c b/usr/src/uts/common/io/cpqary3/cpqary3.c
index ff4aada92c..f95befda10 100644
--- a/usr/src/uts/common/io/cpqary3/cpqary3.c
+++ b/usr/src/uts/common/io/cpqary3/cpqary3.c
@@ -412,11 +412,6 @@ cpqary3_attach(dev_info_t *dip, ddi_attach_cmd_t attach_cmd)
}
- /* Register a timeout driver-routine to be called every 2 secs */
- cpqary3p->tick_tmout_id = timeout(cpqary3_tick_hdlr,
- (caddr_t)cpqary3p, drv_usectohz(CPQARY3_TICKTMOUT_VALUE));
- cleanstatus |= CPQARY3_TICK_TMOUT_REGD;
-
/* Register Software Interrupt Handler */
if (ddi_add_softintr(dip, DDI_SOFTINT_HIGH,
&cpqary3p->cpqary3_softintr_id, &cpqary3p->sw_iblock_cookie, NULL,
@@ -440,6 +435,13 @@ cpqary3_attach(dev_info_t *dip, ddi_attach_cmd_t attach_cmd)
cpqary3_lockup_intr_onoff(cpqary3p, CPQARY3_LOCKUP_INTR_ENABLE);
/*
+ * Register a periodic function to be called every 15 seconds.
+ */
+ cpqary3p->cpq_periodic = ddi_periodic_add(cpqary3_periodic, cpqary3p,
+ 15 * NANOSEC, DDI_IPL_0);
+ cleanstatus |= CPQARY3_TICK_TMOUT_REGD;
+
+ /*
* We have come with hmaeventd - which logs the storage events on
* console as well as in IML. So we are commenting the NOE support in
* the driver
@@ -661,17 +663,16 @@ cpqary3_cleanup(cpqary3_t *cpqary3p, uint32_t status)
* any register/memory mapping
*/
+ if ((status & CPQARY3_TICK_TMOUT_REGD) && cpqary3p->cpq_periodic) {
+ ddi_periodic_delete(cpqary3p->cpq_periodic);
+ }
+
if (status & CPQARY3_INTR_HDLR_SET)
ddi_remove_intr(cpqary3p->dip, 0, cpqary3p->hw_iblock_cookie);
if (status & CPQARY3_SW_INTR_HDLR_SET)
ddi_remove_softintr(cpqary3p->cpqary3_softintr_id);
- if ((status & CPQARY3_TICK_TMOUT_REGD) && cpqary3p->tick_tmout_id) {
- VERIFY(untimeout(cpqary3p->tick_tmout_id) >= 0);
- cpqary3p->tick_tmout_id = NULL;
- }
-
if (status & CPQARY3_CREATE_MINOR_NODE) {
(void) sprintf(node_name, "cpqary3%d", cpqary3p->instance);
ddi_remove_minor_node(cpqary3p->dip, node_name);
diff --git a/usr/src/uts/common/io/cpqary3/cpqary3.h b/usr/src/uts/common/io/cpqary3/cpqary3.h
index f7c229eb69..3f35d34ca4 100644
--- a/usr/src/uts/common/io/cpqary3/cpqary3.h
+++ b/usr/src/uts/common/io/cpqary3/cpqary3.h
@@ -84,8 +84,6 @@ extern "C" {
#define CPQARY3_CLEAN_ALL 0x0FFF
-#define CPQARY3_TICKTMOUT_VALUE 180000000 /* 180 seconds */
-
/*
* Defines for Maximum and Default Settings.
*/
@@ -155,6 +153,7 @@ extern "C" {
#define RETURN_VOID_IF_NULL(x) if (NULL == x) return
#define RETURN_NULL_IF_NULL(x) if (NULL == x) return (NULL)
#define RETURN_FAILURE_IF_NULL(x) if (NULL == x) return (CPQARY3_FAILURE)
+#define CPQARY3_SEC2HZ(x) drv_usectohz((x) * 1000000)
/*
* Macros for memory allocation/deallocations
@@ -283,10 +282,12 @@ typedef struct cpqary3_per_controller {
/* Controller Specific Information */
int8_t hba_name[38];
ulong_t num_of_targets;
- uint32_t heartbeat;
uint32_t board_id;
cpqary3_bd_t *bddef;
+ uint32_t cpq_last_heartbeat;
+ clock_t cpq_last_heartbeat_lbolt;
+
/* Condition Variables used */
kcondvar_t cv_immediate_wait;
kcondvar_t cv_noe_wait;
@@ -309,7 +310,7 @@ typedef struct cpqary3_per_controller {
kmutex_t sw_mutex; /* s/w mutex */
ddi_softintr_t cpqary3_softintr_id; /* s/w intr identifier */
uint8_t swintr_flag;
- timeout_id_t tick_tmout_id; /* timeout identifier */
+ ddi_periodic_t cpq_periodic;
uint8_t cpqary3_tick_hdlr;
scsi_hba_tran_t *hba_tran; /* transport structure */
cpqary3_cmdmemlist_t *cmdmemlistp; /* database - Memory Pool */
@@ -452,7 +453,7 @@ typedef struct cpqary3_ioctlreq {
void cpqary3_init_hbatran(cpqary3_t *);
void cpqary3_read_conf_file(dev_info_t *, cpqary3_t *);
-void cpqary3_tick_hdlr(void *);
+void cpqary3_periodic(void *);
void cpqary3_flush_cache(cpqary3_t *);
void cpqary3_intr_onoff(cpqary3_t *, uint8_t);
void cpqary3_lockup_intr_onoff(cpqary3_t *, uint8_t);
@@ -487,6 +488,7 @@ void cpqary3_synccmd_free(cpqary3_t *, cpqary3_cmdpvt_t *);
int cpqary3_synccmd_send(cpqary3_t *, cpqary3_cmdpvt_t *, clock_t, int);
uint8_t cpqary3_poll_retrieve(cpqary3_t *cpqary3p, uint32_t poll_tag);
uint8_t cpqary3_build_cmdlist(cpqary3_cmdpvt_t *cpqary3_cmdpvtp, uint32_t tid);
+void cpqary3_lockup_check(cpqary3_t *);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/io/cpqary3/cpqary3_isr.c b/usr/src/uts/common/io/cpqary3/cpqary3_isr.c
index d62e93159d..76e5242c9e 100644
--- a/usr/src/uts/common/io/cpqary3/cpqary3_isr.c
+++ b/usr/src/uts/common/io/cpqary3/cpqary3_isr.c
@@ -50,44 +50,14 @@ cpqary3_hw_isr(caddr_t per_ctlr)
*/
if (cpqary3p->check_ctlr_intr(cpqary3p) != CPQARY3_SUCCESS) {
/*
- * The Outbound Post List FIFO is not empty, so we must
- * service this interrupt.
+ * Check to see if the firmware has come to rest. If it has,
+ * this routine will panic the system.
*/
- goto service;
- }
+ cpqary3_lockup_check(cpqary3p);
- if (CPQARY3_FAILURE == cpqary3p->check_ctlr_intr(cpqary3p)) {
- if (cpqary3p->heartbeat ==
- DDI_GET32(cpqary3p, &ctp->HeartBeat)) {
- if (0x2 & ddi_get32(cpqary3p->odr_handle,
- (uint32_t *)cpqary3p->odr)) {
- spr0 = ddi_get32(cpqary3p->spr0_handle,
- (uint32_t *)cpqary3p->spr0);
- spr0 = spr0 >> 16;
- cmn_err(CE_WARN, "CPQary3 : %s HBA firmware "
- "Locked !!! Lockup Code: 0x%x",
- cpqary3p->hba_name, spr0);
- cmn_err(CE_WARN, "CPQary3 : Please reboot "
- "the system");
- ddi_put32(cpqary3p->odr_cl_handle,
- (uint32_t *)cpqary3p->odr_cl, 0x2);
- cpqary3_intr_onoff(cpqary3p,
- CPQARY3_INTR_DISABLE);
- if (cpqary3p->host_support & 0x4) {
- cpqary3_lockup_intr_onoff(cpqary3p,
- CPQARY3_LOCKUP_INTR_DISABLE);
- }
- cpqary3p->controller_lockup = CPQARY3_TRUE;
- }
- return (DDI_INTR_CLAIMED);
- }
return (DDI_INTR_UNCLAIMED);
}
-service:
-
- /* PERF */
-
/*
* We decided that we will have only one retrieve function for
* both simple and performant mode. To achieve this we have to mimic
diff --git a/usr/src/uts/common/io/cpqary3/cpqary3_talk2ctlr.c b/usr/src/uts/common/io/cpqary3/cpqary3_talk2ctlr.c
index 2382ac22c5..7275c656a1 100644
--- a/usr/src/uts/common/io/cpqary3/cpqary3_talk2ctlr.c
+++ b/usr/src/uts/common/io/cpqary3/cpqary3_talk2ctlr.c
@@ -638,7 +638,6 @@ cpqary3_init_ctlr(cpqary3_t *cpqary3p)
* Zero the Upper 32 Address in the Controller
*/
DDI_PUT32(cpqary3p, &ctp->HostWrite.Upper32Addr, 0x00000000);
- cpqary3p->heartbeat = DDI_GET32(cpqary3p, &ctp->HeartBeat);
/* Set the controller interrupt check routine */
cpqary3p->check_ctlr_intr = cpqary3_check_simple_ctlr_intr;
@@ -815,7 +814,6 @@ cpqary3_init_ctlr(cpqary3_t *cpqary3p)
*/
DDI_PUT32(cpqary3p, &ctp->HostWrite.Upper32Addr, 0x00000000);
- cpqary3p->heartbeat = DDI_GET32(cpqary3p, &ctp->HeartBeat);
/* Set the controller interrupt check routine */
@@ -838,6 +836,14 @@ cpqary3_init_ctlr(cpqary3_t *cpqary3p)
DDI_GET32(cpqary3p, &ctp->HostDrvrSupport);
}
+ /*
+ * Read initial controller heartbeat value and mark the current
+ * reading time.
+ */
+ cpqary3p->cpq_last_heartbeat = ddi_get32(cpqary3p->ct_handle,
+ &ctp->HeartBeat);
+ cpqary3p->cpq_last_heartbeat_lbolt = ddi_get_lbolt();
+
return (CPQARY3_SUCCESS);
}
diff --git a/usr/src/uts/common/io/cpqary3/cpqary3_util.c b/usr/src/uts/common/io/cpqary3/cpqary3_util.c
index b4a017d5ce..ed8323e387 100644
--- a/usr/src/uts/common/io/cpqary3/cpqary3_util.c
+++ b/usr/src/uts/common/io/cpqary3/cpqary3_util.c
@@ -86,111 +86,112 @@ cpqary3_read_conf_file(dev_info_t *dip, cpqary3_t *cpqary3p)
void
cpqary3_lockup_check(cpqary3_t *cpq)
{
+ /*
+ * Read the current controller heartbeat value.
+ */
+ uint32_t heartbeat = ddi_get32(cpq->ct_handle, &cpq->ct->HeartBeat);
+
+ /*
+ * Check to see if the value is the same as last time we looked:
+ */
+ if (heartbeat != cpq->cpq_last_heartbeat) {
+ /*
+ * The heartbeat value has changed, which suggests that the
+ * firmware in the controller has not yet come to a complete
+ * stop. Record the new value, as well as the current time.
+ */
+ cpq->cpq_last_heartbeat = heartbeat;
+ cpq->cpq_last_heartbeat_lbolt = ddi_get_lbolt();
+ return;
+ }
+
+ /*
+ * The controller _might_ have been able to signal to us that is
+ * has locked up. This is a truly unfathomable state of affairs:
+ * If the firmware can tell it has flown off the rails, why not
+ * simply reset the controller?
+ */
+ uint32_t odr = ddi_get32(cpq->odr_handle, cpq->odr);
+ uint32_t spr = ddi_get32(cpq->spr0_handle, cpq->spr0);
+ if ((odr & CISS_ODR_BIT_LOCKUP) != 0) {
+ dev_err(cpq->dip, CE_PANIC, "HP SmartArray firmware has "
+ "reported a critical fault (odr %08x spr %08x)",
+ odr, spr);
+ }
+
+ clock_t expiry = cpq->cpq_last_heartbeat_lbolt + CPQARY3_SEC2HZ(60);
+ if (ddi_get_lbolt() >= expiry) {
+ dev_err(cpq->dip, CE_PANIC, "HP SmartArray firmware has "
+ "stopped responding (odr %08x spr %08x)",
+ odr, spr);
+ }
}
/*
- * Function : cpqary3_tick_hdlr
- * Description : This routine is called once in 60 seconds to detect any
+ * Function : cpqary3_periodic
+ * Description : This routine is called once in 15 seconds to detect any
* command that is pending with the controller and has
* timed out.
- * Once invoked, it re-initializes itself such that it is
- * invoked after an interval of 60 seconds.
* Called By : kernel
* Parameters : per_controller
* Calls : None
* Return Values: None
*/
void
-cpqary3_tick_hdlr(void *arg)
+cpqary3_periodic(void *arg)
{
- clock_t cpqary3_lbolt;
- clock_t cpqary3_ticks;
- cpqary3_t *ctlr;
- cpqary3_pkt_t *pktp;
- struct scsi_pkt *scsi_pktp;
- cpqary3_cmdpvt_t *local;
- volatile CfgTable_t *ctp;
- uint32_t i;
- uint32_t no_cmds = 0;
+ cpqary3_t *cpq = arg;
+ uint32_t no_cmds;
- /*
- * The per-controller shall be passed as argument.
- * Read the HeartBeat of the controller.
- * if the current heartbeat is the same as the one recorded earlier,
- * the f/w has locked up!!!
- */
+ cpqary3_lockup_check(cpq);
- if (NULL == (ctlr = (cpqary3_t *)arg))
- return;
-
- ctp = (CfgTable_t *)ctlr->ct;
-
- /* CONTROLLER_LOCKUP */
- if (ctlr->heartbeat == DDI_GET32(ctlr, &ctp->HeartBeat)) {
- if (ctlr->lockup_logged == CPQARY3_FALSE) {
- cmn_err(CE_WARN, "CPQary3 : "
- "%s HBA firmware Locked !!!", ctlr->hba_name);
- cmn_err(CE_WARN, "CPQary3 : "
- "Please reboot the system");
- cpqary3_intr_onoff(ctlr, CPQARY3_INTR_DISABLE);
- if (ctlr->host_support & 0x4)
- cpqary3_lockup_intr_onoff(ctlr,
- CPQARY3_LOCKUP_INTR_DISABLE);
- ctlr->controller_lockup = CPQARY3_TRUE;
- ctlr->lockup_logged = CPQARY3_TRUE;
- }
- }
- /* CONTROLLER_LOCKUP */
- no_cmds = (uint32_t)((ctlr->ctlr_maxcmds / 3) *
- NO_OF_CMDLIST_IN_A_BLK);
- mutex_enter(&ctlr->sw_mutex);
+ mutex_enter(&cpq->sw_mutex);
+ no_cmds = (uint32_t)((cpq->ctlr_maxcmds / 3) * NO_OF_CMDLIST_IN_A_BLK);
+ for (uint32_t i = 0; i < no_cmds; i++) {
+ cpqary3_cmdpvt_t *local = &cpq->cmdmemlistp->pool[i];
+ cpqary3_pkt_t *pktp;
+ struct scsi_pkt *scsi_pktp;
+ clock_t cpqary3_lbolt;
- for (i = 0; i < no_cmds; i++) {
- local = &ctlr->cmdmemlistp->pool[i];
ASSERT(local != NULL);
- pktp = MEM2PVTPKT(local);
-
- if (!pktp)
+ if ((pktp = MEM2PVTPKT(local)) == NULL) {
continue;
+ }
if ((local->cmdpvt_flag == CPQARY3_TIMEOUT) ||
(local->cmdpvt_flag == CPQARY3_RESET)) {
continue;
}
- if (local->occupied == CPQARY3_OCCUPIED) {
- scsi_pktp = pktp->scsi_cmd_pkt;
- cpqary3_lbolt = ddi_get_lbolt();
- if ((scsi_pktp) && (scsi_pktp->pkt_time)) {
- cpqary3_ticks = cpqary3_lbolt -
- pktp->cmd_start_time;
-
- if ((drv_hztousec(cpqary3_ticks)/1000000) >
- scsi_pktp->pkt_time) {
- scsi_pktp->pkt_reason = CMD_TIMEOUT;
- scsi_pktp->pkt_statistics =
- STAT_TIMEOUT;
- scsi_pktp->pkt_state = STATE_GOT_BUS |
- STATE_GOT_TARGET | STATE_SENT_CMD;
- local->cmdpvt_flag = CPQARY3_TIMEOUT;
-
- /* This should always be the case */
- if (scsi_pktp->pkt_comp) {
- mutex_exit(&ctlr->sw_mutex);
- (*scsi_pktp->pkt_comp)
- (scsi_pktp);
- mutex_enter(&ctlr->sw_mutex);
- continue;
- }
+ if (local->occupied != CPQARY3_OCCUPIED) {
+ continue;
+ }
+
+ scsi_pktp = pktp->scsi_cmd_pkt;
+ cpqary3_lbolt = ddi_get_lbolt();
+ if ((scsi_pktp) && (scsi_pktp->pkt_time)) {
+ clock_t cpqary3_ticks = cpqary3_lbolt -
+ pktp->cmd_start_time;
+
+ if ((drv_hztousec(cpqary3_ticks) / 1000000) >
+ scsi_pktp->pkt_time) {
+ scsi_pktp->pkt_reason = CMD_TIMEOUT;
+ scsi_pktp->pkt_statistics = STAT_TIMEOUT;
+ scsi_pktp->pkt_state = STATE_GOT_BUS |
+ STATE_GOT_TARGET | STATE_SENT_CMD;
+ local->cmdpvt_flag = CPQARY3_TIMEOUT;
+
+ /* This should always be the case */
+ if (scsi_pktp->pkt_comp != NULL) {
+ mutex_exit(&cpq->sw_mutex);
+ (*scsi_pktp->pkt_comp)(scsi_pktp);
+ mutex_enter(&cpq->sw_mutex);
+ continue;
}
}
}
}
-
- ctlr->heartbeat = DDI_GET32(ctlr, &ctp->HeartBeat);
- mutex_exit(&ctlr->sw_mutex);
- ctlr->tick_tmout_id = timeout(cpqary3_tick_hdlr,
- (caddr_t)ctlr, drv_usectohz(CPQARY3_TICKTMOUT_VALUE));
+ mutex_exit(&cpq->sw_mutex);
}
/*