diff options
author | Alek Pinchuk <apinchuk@datto.com> | 2017-10-14 18:19:05 -0700 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2017-10-31 20:55:33 -0400 |
commit | 0244979b1714a04f23ac9fa8367e59f6fb75d8f3 (patch) | |
tree | c62ded5775f0e0efe95f509a65dba5e386e1367e /usr/src/lib/fm | |
parent | 11c2233db8e23e91bc416bff8a4f6f3a0f84b12c (diff) | |
download | illumos-gate-0244979b1714a04f23ac9fa8367e59f6fb75d8f3.tar.gz |
8074 need to add FMA event for SSD wearout
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Ken Mays <maybird1776@yahoo.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/lib/fm')
-rw-r--r-- | usr/src/lib/fm/libdiskstatus/common/ds_impl.h | 5 | ||||
-rw-r--r-- | usr/src/lib/fm/libdiskstatus/common/ds_scsi.c | 116 | ||||
-rw-r--r-- | usr/src/lib/fm/libdiskstatus/common/ds_scsi.h | 28 | ||||
-rw-r--r-- | usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c | 22 |
4 files changed, 156 insertions, 15 deletions
diff --git a/usr/src/lib/fm/libdiskstatus/common/ds_impl.h b/usr/src/lib/fm/libdiskstatus/common/ds_impl.h index 34f8b15d75..8b8132791d 100644 --- a/usr/src/lib/fm/libdiskstatus/common/ds_impl.h +++ b/usr/src/lib/fm/libdiskstatus/common/ds_impl.h @@ -21,13 +21,12 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ #ifndef _DS_IMPL_H #define _DS_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <dlfcn.h> #include <libnvpair.h> #include <sys/types.h> @@ -53,6 +52,7 @@ struct disk_status { nvlist_t *ds_overtemp; /* overtemp */ nvlist_t *ds_predfail; /* predict fail */ nvlist_t *ds_testfail; /* self test fail */ + nvlist_t *ds_ssmwearout; /* SSM wearout fail */ int ds_error; /* last error */ nvlist_t *ds_state; /* protocol state */ }; @@ -60,6 +60,7 @@ struct disk_status { #define DS_FAULT_OVERTEMP 0x1 #define DS_FAULT_PREDFAIL 0x2 #define DS_FAULT_TESTFAIL 0x4 +#define DS_FAULT_SSMWEAROUT 0x8 extern void dprintf(const char *, ...); extern void ddump(const char *, const void *, size_t); diff --git a/usr/src/lib/fm/libdiskstatus/common/ds_scsi.c b/usr/src/lib/fm/libdiskstatus/common/ds_scsi.c index 0b80f4d4c2..081f441dd9 100644 --- a/usr/src/lib/fm/libdiskstatus/common/ds_scsi.c +++ b/usr/src/lib/fm/libdiskstatus/common/ds_scsi.c @@ -21,10 +21,9 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <assert.h> #include <errno.h> #include <libdiskstatus.h> @@ -77,6 +76,8 @@ static int logpage_temp_verify(ds_scsi_info_t *, scsi_log_parameter_header_t *, int, nvlist_t *); static int logpage_selftest_verify(ds_scsi_info_t *, scsi_log_parameter_header_t *, int, nvlist_t *); +static int logpage_ssm_verify(ds_scsi_info_t *, + scsi_log_parameter_header_t *, int, nvlist_t *); static int logpage_ie_analyze(ds_scsi_info_t *, scsi_log_parameter_header_t *, int); @@ -84,6 +85,8 @@ static int logpage_temp_analyze(ds_scsi_info_t *, scsi_log_parameter_header_t *, int); static int logpage_selftest_analyze(ds_scsi_info_t *, scsi_log_parameter_header_t *, int); +static int logpage_ssm_analyze(ds_scsi_info_t *, + scsi_log_parameter_header_t *, int); static struct logpage_validation_entry log_validation[] = { { LOGPAGE_IE, LOGPAGE_SUPP_IE, @@ -94,7 +97,10 @@ static struct logpage_validation_entry log_validation[] = { logpage_temp_verify, logpage_temp_analyze }, { LOGPAGE_SELFTEST, LOGPAGE_SUPP_SELFTEST, "self-test", - logpage_selftest_verify, logpage_selftest_analyze } + logpage_selftest_verify, logpage_selftest_analyze }, + { LOGPAGE_SSM, LOGPAGE_SUPP_SSM, + FM_EREPORT_SCSI_SSMWEAROUT, + logpage_ssm_verify, logpage_ssm_analyze } }; #define NLOG_VALIDATION (sizeof (log_validation) / sizeof (log_validation[0])) @@ -757,6 +763,51 @@ logpage_selftest_verify(ds_scsi_info_t *sip, } /* + * Verify the contents of the Solid State Media (SSM) log page. + * As of SBC3r36 SSM log page contains one log parameter: + * "Percentage Used Endurance Indicator" which is mandatory. + * For the verification phase, we sanity check this parameter + * by making sure it's present and it's length is set to 0x04. + */ +static int +logpage_ssm_verify(ds_scsi_info_t *sip, + scsi_log_parameter_header_t *lphp, int log_length, nvlist_t *nvl) +{ + ushort_t param_code; + int i, plen = 0; + + for (i = 0; i < log_length; i += plen) { + lphp = (scsi_log_parameter_header_t *)((char *)lphp + plen); + param_code = BE_16(lphp->lph_param); + + switch (param_code) { + case LOGPARAM_PRCNT_USED: + if (nvlist_add_boolean_value(nvl, + FM_EREPORT_SCSI_SSMWEAROUT, B_TRUE) != 0) + return (scsi_set_errno(sip, EDS_NOMEM)); + if (lphp->lph_length != LOGPARAM_PRCNT_USED_PARAM_LEN) { + if (nvlist_add_uint8(nvl, + "invalid-length", lphp->lph_length) != 0) + return (scsi_set_errno(sip, EDS_NOMEM)); + + dprintf("solid state media logpage bad len\n"); + break; + } + + /* verification succeded */ + return (0); + } + + plen = lphp->lph_length + + sizeof (scsi_log_parameter_header_t); + } + + /* verification failed */ + sip->si_supp_log &= ~LOGPAGE_SUPP_SSM; + return (0); +} + +/* * Load the current IE mode pages */ static int @@ -1145,6 +1196,65 @@ logpage_selftest_analyze(ds_scsi_info_t *sip, scsi_log_parameter_header_t *lphp, } /* + * Analyze the contents of the Solid State Media (SSM) log page's + * "Percentage Used Endurance Indicator" log parameter. + * We generate a fault if the percentage used is equal to or over + * PRCNT_USED_FAULT_THRSH + */ +static int +logpage_ssm_analyze(ds_scsi_info_t *sip, scsi_log_parameter_header_t *lphp, + int log_length) +{ + uint16_t param_code; + scsi_ssm_log_param_t *ssm; + nvlist_t *nvl; + int i, plen = 0; + + assert(sip->si_dsp->ds_overtemp == NULL); + if (nvlist_alloc(&sip->si_dsp->ds_overtemp, NV_UNIQUE_NAME, 0) != 0) + return (scsi_set_errno(sip, EDS_NOMEM)); + nvl = sip->si_dsp->ds_overtemp; + + for (i = 0; i < log_length; i += plen) { + lphp = (scsi_log_parameter_header_t *)((uint8_t *)lphp + plen); + param_code = BE_16(lphp->lph_param); + ssm = (scsi_ssm_log_param_t *)lphp; + + switch (param_code) { + case LOGPARAM_PRCNT_USED: + if (lphp->lph_length != LOGPARAM_PRCNT_USED_PARAM_LEN) + break; + + if ((nvlist_add_uint8(nvl, + FM_EREPORT_PAYLOAD_SCSI_CURSSMWEAROUT, + ssm->ssm_prcnt_used) != 0) || + (nvlist_add_uint8(nvl, + FM_EREPORT_PAYLOAD_SCSI_THRSHSSMWEAROUT, + PRCNT_USED_FAULT_THRSH) != 0)) + return (scsi_set_errno(sip, EDS_NOMEM)); + + if (ssm->ssm_prcnt_used >= PRCNT_USED_FAULT_THRSH) + sip->si_dsp->ds_faults |= DS_FAULT_SSMWEAROUT; + + return (0); + } + + plen = lphp->lph_length + + sizeof (scsi_log_parameter_header_t); + } + + /* + * If we got this far we didn't see LOGPARAM_PRCNT_USED + * which is strange since we verified that it's there + */ + dprintf("solid state media logpage analyze failed\n"); +#if DEBUG + abort(); +#endif + return (scsi_set_errno(sip, EDS_NOT_SUPPORTED)); +} + +/* * Analyze the IE mode sense page explicitly. This is only needed if the IE log * page is not supported. */ diff --git a/usr/src/lib/fm/libdiskstatus/common/ds_scsi.h b/usr/src/lib/fm/libdiskstatus/common/ds_scsi.h index 6d2648f06b..db4ee6d591 100644 --- a/usr/src/lib/fm/libdiskstatus/common/ds_scsi.h +++ b/usr/src/lib/fm/libdiskstatus/common/ds_scsi.h @@ -21,13 +21,12 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ #ifndef _DS_SCSI_H #define _DS_SCSI_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/byteorder.h> #include <sys/scsi/scsi.h> @@ -50,12 +49,14 @@ extern "C" { typedef struct scsi_log_header { #if defined(_BIT_FIELDS_LTOH) uint8_t lh_code : 6, - __reserved : 2; + lh_spf : 1, + lh_ds : 1; #else - uint8_t __reserved : 2, + uint8_t lh_ds : 1, + lh_spf : 1, lh_code : 6; #endif - uint8_t __reserved2; + uint8_t lh_subpage; uint16_t lh_length; } scsi_log_header_t; @@ -158,6 +159,20 @@ typedef struct scsi_selftest_log_param { sizeof (scsi_log_parameter_header_t)) /* + * Described in SBC3 + */ +typedef struct scsi_ssm_log_param { + scsi_log_parameter_header_t ssm_hdr; + uint16_t __reserved2; + uint8_t __reserved1; + uchar_t ssm_prcnt_used; +} scsi_ssm_log_param_t; + +#define LOGPARAM_PRCNT_USED 0x0001 +#define LOGPARAM_PRCNT_USED_PARAM_LEN 0x04 +#define PRCNT_USED_FAULT_THRSH 90 + +/* * Mode sense/select page header information */ typedef struct scsi_ms_header { @@ -278,6 +293,8 @@ typedef struct scsi_ie_page { #define LOGPAGE_TEMP 0x0d #define LOGPAGE_SELFTEST 0x10 #define LOGPAGE_IE 0x2f +/* Solid State Media log page code */ +#define LOGPAGE_SSM 0x11 /* ASC constants */ #define ASC_INVALID_OPCODE 0x20 @@ -307,6 +324,7 @@ typedef struct scsi_ie_page { #define LOGPAGE_SUPP_IE 0x1 #define LOGPAGE_SUPP_TEMP 0x2 #define LOGPAGE_SUPP_SELFTEST 0x4 +#define LOGPAGE_SUPP_SSM 0x8 #define MSG_BUFLEN 256 diff --git a/usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c b/usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c index e2cf5ad7a2..18468289a7 100644 --- a/usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c +++ b/usr/src/lib/fm/libdiskstatus/common/libdiskstatus.c @@ -21,10 +21,9 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Disk status library * @@ -33,9 +32,10 @@ * SCSI (and therefore SATA) disks are currently supported. The library is * capable of detecting the following status conditions: * - * - Predictive failure - * - Overtemp - * - Self-test failure + * - Predictive failure + * - Overtemp + * - Self-test failure + * - Solid State Media wearout */ #include <assert.h> @@ -131,6 +131,7 @@ disk_status_close(disk_status_t *dsp) nvlist_free(dsp->ds_predfail); nvlist_free(dsp->ds_overtemp); nvlist_free(dsp->ds_testfail); + nvlist_free(dsp->ds_ssmwearout); if (dsp->ds_data) dsp->ds_transport->dt_close(dsp->ds_data); (void) close(dsp->ds_fd); @@ -172,6 +173,8 @@ disk_status_get(disk_status_t *dsp) nvlist_free(dsp->ds_testfail); nvlist_free(dsp->ds_predfail); nvlist_free(dsp->ds_overtemp); + nvlist_free(dsp->ds_ssmwearout); + dsp->ds_ssmwearout = NULL; dsp->ds_testfail = dsp->ds_overtemp = dsp->ds_predfail = NULL; dsp->ds_faults = 0; @@ -223,6 +226,15 @@ disk_status_get(disk_status_t *dsp) goto nverror; } + if (dsp->ds_ssmwearout != NULL) { + if ((err = nvlist_add_boolean_value(faults, + FM_EREPORT_SCSI_SSMWEAROUT, + (dsp->ds_faults & DS_FAULT_SSMWEAROUT) != 0)) != 0 || + (err = nvlist_add_nvlist(nvl, FM_EREPORT_SCSI_SSMWEAROUT, + dsp->ds_ssmwearout)) != 0) + goto nverror; + } + if ((err = nvlist_add_nvlist(nvl, "faults", faults)) != 0) goto nverror; |