summaryrefslogtreecommitdiff
path: root/usr/src/lib/fm
diff options
context:
space:
mode:
authorAlek Pinchuk <apinchuk@datto.com>2017-10-14 18:19:05 -0700
committerDan McDonald <danmcd@joyent.com>2017-10-31 20:55:33 -0400
commit0244979b1714a04f23ac9fa8367e59f6fb75d8f3 (patch)
treec62ded5775f0e0efe95f509a65dba5e386e1367e /usr/src/lib/fm
parent11c2233db8e23e91bc416bff8a4f6f3a0f84b12c (diff)
downloadillumos-gate-0244979b1714a04f23ac9fa8367e59f6fb75d8f3.tar.gz
8074 need to add FMA event for SSD wearout
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Ken Mays <maybird1776@yahoo.com> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/lib/fm')
-rw-r--r--usr/src/lib/fm/libdiskstatus/common/ds_impl.h5
-rw-r--r--usr/src/lib/fm/libdiskstatus/common/ds_scsi.c116
-rw-r--r--usr/src/lib/fm/libdiskstatus/common/ds_scsi.h28
-rw-r--r--usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c22
4 files changed, 156 insertions, 15 deletions
diff --git a/usr/src/lib/fm/libdiskstatus/common/ds_impl.h b/usr/src/lib/fm/libdiskstatus/common/ds_impl.h
index 34f8b15d75..8b8132791d 100644
--- a/usr/src/lib/fm/libdiskstatus/common/ds_impl.h
+++ b/usr/src/lib/fm/libdiskstatus/common/ds_impl.h
@@ -21,13 +21,12 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _DS_IMPL_H
#define _DS_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <dlfcn.h>
#include <libnvpair.h>
#include <sys/types.h>
@@ -53,6 +52,7 @@ struct disk_status {
nvlist_t *ds_overtemp; /* overtemp */
nvlist_t *ds_predfail; /* predict fail */
nvlist_t *ds_testfail; /* self test fail */
+ nvlist_t *ds_ssmwearout; /* SSM wearout fail */
int ds_error; /* last error */
nvlist_t *ds_state; /* protocol state */
};
@@ -60,6 +60,7 @@ struct disk_status {
#define DS_FAULT_OVERTEMP 0x1
#define DS_FAULT_PREDFAIL 0x2
#define DS_FAULT_TESTFAIL 0x4
+#define DS_FAULT_SSMWEAROUT 0x8
extern void dprintf(const char *, ...);
extern void ddump(const char *, const void *, size_t);
diff --git a/usr/src/lib/fm/libdiskstatus/common/ds_scsi.c b/usr/src/lib/fm/libdiskstatus/common/ds_scsi.c
index 0b80f4d4c2..081f441dd9 100644
--- a/usr/src/lib/fm/libdiskstatus/common/ds_scsi.c
+++ b/usr/src/lib/fm/libdiskstatus/common/ds_scsi.c
@@ -21,10 +21,9 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <assert.h>
#include <errno.h>
#include <libdiskstatus.h>
@@ -77,6 +76,8 @@ static int logpage_temp_verify(ds_scsi_info_t *,
scsi_log_parameter_header_t *, int, nvlist_t *);
static int logpage_selftest_verify(ds_scsi_info_t *,
scsi_log_parameter_header_t *, int, nvlist_t *);
+static int logpage_ssm_verify(ds_scsi_info_t *,
+ scsi_log_parameter_header_t *, int, nvlist_t *);
static int logpage_ie_analyze(ds_scsi_info_t *,
scsi_log_parameter_header_t *, int);
@@ -84,6 +85,8 @@ static int logpage_temp_analyze(ds_scsi_info_t *,
scsi_log_parameter_header_t *, int);
static int logpage_selftest_analyze(ds_scsi_info_t *,
scsi_log_parameter_header_t *, int);
+static int logpage_ssm_analyze(ds_scsi_info_t *,
+ scsi_log_parameter_header_t *, int);
static struct logpage_validation_entry log_validation[] = {
{ LOGPAGE_IE, LOGPAGE_SUPP_IE,
@@ -94,7 +97,10 @@ static struct logpage_validation_entry log_validation[] = {
logpage_temp_verify, logpage_temp_analyze },
{ LOGPAGE_SELFTEST, LOGPAGE_SUPP_SELFTEST,
"self-test",
- logpage_selftest_verify, logpage_selftest_analyze }
+ logpage_selftest_verify, logpage_selftest_analyze },
+ { LOGPAGE_SSM, LOGPAGE_SUPP_SSM,
+ FM_EREPORT_SCSI_SSMWEAROUT,
+ logpage_ssm_verify, logpage_ssm_analyze }
};
#define NLOG_VALIDATION (sizeof (log_validation) / sizeof (log_validation[0]))
@@ -757,6 +763,51 @@ logpage_selftest_verify(ds_scsi_info_t *sip,
}
/*
+ * Verify the contents of the Solid State Media (SSM) log page.
+ * As of SBC3r36 SSM log page contains one log parameter:
+ * "Percentage Used Endurance Indicator" which is mandatory.
+ * For the verification phase, we sanity check this parameter
+ * by making sure it's present and it's length is set to 0x04.
+ */
+static int
+logpage_ssm_verify(ds_scsi_info_t *sip,
+ scsi_log_parameter_header_t *lphp, int log_length, nvlist_t *nvl)
+{
+ ushort_t param_code;
+ int i, plen = 0;
+
+ for (i = 0; i < log_length; i += plen) {
+ lphp = (scsi_log_parameter_header_t *)((char *)lphp + plen);
+ param_code = BE_16(lphp->lph_param);
+
+ switch (param_code) {
+ case LOGPARAM_PRCNT_USED:
+ if (nvlist_add_boolean_value(nvl,
+ FM_EREPORT_SCSI_SSMWEAROUT, B_TRUE) != 0)
+ return (scsi_set_errno(sip, EDS_NOMEM));
+ if (lphp->lph_length != LOGPARAM_PRCNT_USED_PARAM_LEN) {
+ if (nvlist_add_uint8(nvl,
+ "invalid-length", lphp->lph_length) != 0)
+ return (scsi_set_errno(sip, EDS_NOMEM));
+
+ dprintf("solid state media logpage bad len\n");
+ break;
+ }
+
+ /* verification succeded */
+ return (0);
+ }
+
+ plen = lphp->lph_length +
+ sizeof (scsi_log_parameter_header_t);
+ }
+
+ /* verification failed */
+ sip->si_supp_log &= ~LOGPAGE_SUPP_SSM;
+ return (0);
+}
+
+/*
* Load the current IE mode pages
*/
static int
@@ -1145,6 +1196,65 @@ logpage_selftest_analyze(ds_scsi_info_t *sip, scsi_log_parameter_header_t *lphp,
}
/*
+ * Analyze the contents of the Solid State Media (SSM) log page's
+ * "Percentage Used Endurance Indicator" log parameter.
+ * We generate a fault if the percentage used is equal to or over
+ * PRCNT_USED_FAULT_THRSH
+ */
+static int
+logpage_ssm_analyze(ds_scsi_info_t *sip, scsi_log_parameter_header_t *lphp,
+ int log_length)
+{
+ uint16_t param_code;
+ scsi_ssm_log_param_t *ssm;
+ nvlist_t *nvl;
+ int i, plen = 0;
+
+ assert(sip->si_dsp->ds_overtemp == NULL);
+ if (nvlist_alloc(&sip->si_dsp->ds_overtemp, NV_UNIQUE_NAME, 0) != 0)
+ return (scsi_set_errno(sip, EDS_NOMEM));
+ nvl = sip->si_dsp->ds_overtemp;
+
+ for (i = 0; i < log_length; i += plen) {
+ lphp = (scsi_log_parameter_header_t *)((uint8_t *)lphp + plen);
+ param_code = BE_16(lphp->lph_param);
+ ssm = (scsi_ssm_log_param_t *)lphp;
+
+ switch (param_code) {
+ case LOGPARAM_PRCNT_USED:
+ if (lphp->lph_length != LOGPARAM_PRCNT_USED_PARAM_LEN)
+ break;
+
+ if ((nvlist_add_uint8(nvl,
+ FM_EREPORT_PAYLOAD_SCSI_CURSSMWEAROUT,
+ ssm->ssm_prcnt_used) != 0) ||
+ (nvlist_add_uint8(nvl,
+ FM_EREPORT_PAYLOAD_SCSI_THRSHSSMWEAROUT,
+ PRCNT_USED_FAULT_THRSH) != 0))
+ return (scsi_set_errno(sip, EDS_NOMEM));
+
+ if (ssm->ssm_prcnt_used >= PRCNT_USED_FAULT_THRSH)
+ sip->si_dsp->ds_faults |= DS_FAULT_SSMWEAROUT;
+
+ return (0);
+ }
+
+ plen = lphp->lph_length +
+ sizeof (scsi_log_parameter_header_t);
+ }
+
+ /*
+ * If we got this far we didn't see LOGPARAM_PRCNT_USED
+ * which is strange since we verified that it's there
+ */
+ dprintf("solid state media logpage analyze failed\n");
+#if DEBUG
+ abort();
+#endif
+ return (scsi_set_errno(sip, EDS_NOT_SUPPORTED));
+}
+
+/*
* Analyze the IE mode sense page explicitly. This is only needed if the IE log
* page is not supported.
*/
diff --git a/usr/src/lib/fm/libdiskstatus/common/ds_scsi.h b/usr/src/lib/fm/libdiskstatus/common/ds_scsi.h
index 6d2648f06b..db4ee6d591 100644
--- a/usr/src/lib/fm/libdiskstatus/common/ds_scsi.h
+++ b/usr/src/lib/fm/libdiskstatus/common/ds_scsi.h
@@ -21,13 +21,12 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _DS_SCSI_H
#define _DS_SCSI_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/byteorder.h>
#include <sys/scsi/scsi.h>
@@ -50,12 +49,14 @@ extern "C" {
typedef struct scsi_log_header {
#if defined(_BIT_FIELDS_LTOH)
uint8_t lh_code : 6,
- __reserved : 2;
+ lh_spf : 1,
+ lh_ds : 1;
#else
- uint8_t __reserved : 2,
+ uint8_t lh_ds : 1,
+ lh_spf : 1,
lh_code : 6;
#endif
- uint8_t __reserved2;
+ uint8_t lh_subpage;
uint16_t lh_length;
} scsi_log_header_t;
@@ -158,6 +159,20 @@ typedef struct scsi_selftest_log_param {
sizeof (scsi_log_parameter_header_t))
/*
+ * Described in SBC3
+ */
+typedef struct scsi_ssm_log_param {
+ scsi_log_parameter_header_t ssm_hdr;
+ uint16_t __reserved2;
+ uint8_t __reserved1;
+ uchar_t ssm_prcnt_used;
+} scsi_ssm_log_param_t;
+
+#define LOGPARAM_PRCNT_USED 0x0001
+#define LOGPARAM_PRCNT_USED_PARAM_LEN 0x04
+#define PRCNT_USED_FAULT_THRSH 90
+
+/*
* Mode sense/select page header information
*/
typedef struct scsi_ms_header {
@@ -278,6 +293,8 @@ typedef struct scsi_ie_page {
#define LOGPAGE_TEMP 0x0d
#define LOGPAGE_SELFTEST 0x10
#define LOGPAGE_IE 0x2f
+/* Solid State Media log page code */
+#define LOGPAGE_SSM 0x11
/* ASC constants */
#define ASC_INVALID_OPCODE 0x20
@@ -307,6 +324,7 @@ typedef struct scsi_ie_page {
#define LOGPAGE_SUPP_IE 0x1
#define LOGPAGE_SUPP_TEMP 0x2
#define LOGPAGE_SUPP_SELFTEST 0x4
+#define LOGPAGE_SUPP_SSM 0x8
#define MSG_BUFLEN 256
diff --git a/usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c b/usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c
index e2cf5ad7a2..18468289a7 100644
--- a/usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c
+++ b/usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c
@@ -21,10 +21,9 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Disk status library
*
@@ -33,9 +32,10 @@
* SCSI (and therefore SATA) disks are currently supported. The library is
* capable of detecting the following status conditions:
*
- * - Predictive failure
- * - Overtemp
- * - Self-test failure
+ * - Predictive failure
+ * - Overtemp
+ * - Self-test failure
+ * - Solid State Media wearout
*/
#include <assert.h>
@@ -131,6 +131,7 @@ disk_status_close(disk_status_t *dsp)
nvlist_free(dsp->ds_predfail);
nvlist_free(dsp->ds_overtemp);
nvlist_free(dsp->ds_testfail);
+ nvlist_free(dsp->ds_ssmwearout);
if (dsp->ds_data)
dsp->ds_transport->dt_close(dsp->ds_data);
(void) close(dsp->ds_fd);
@@ -172,6 +173,8 @@ disk_status_get(disk_status_t *dsp)
nvlist_free(dsp->ds_testfail);
nvlist_free(dsp->ds_predfail);
nvlist_free(dsp->ds_overtemp);
+ nvlist_free(dsp->ds_ssmwearout);
+ dsp->ds_ssmwearout = NULL;
dsp->ds_testfail = dsp->ds_overtemp = dsp->ds_predfail = NULL;
dsp->ds_faults = 0;
@@ -223,6 +226,15 @@ disk_status_get(disk_status_t *dsp)
goto nverror;
}
+ if (dsp->ds_ssmwearout != NULL) {
+ if ((err = nvlist_add_boolean_value(faults,
+ FM_EREPORT_SCSI_SSMWEAROUT,
+ (dsp->ds_faults & DS_FAULT_SSMWEAROUT) != 0)) != 0 ||
+ (err = nvlist_add_nvlist(nvl, FM_EREPORT_SCSI_SSMWEAROUT,
+ dsp->ds_ssmwearout)) != 0)
+ goto nverror;
+ }
+
if ((err = nvlist_add_nvlist(nvl, "faults", faults)) != 0)
goto nverror;