backout: 8074 need to add FMA event for SSD wearout (needs work)

Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
author: Robert Mustacchi <rm@joyent.com> 2017-11-02 16:30:37 +0000
committer: Robert Mustacchi <rm@joyent.com> 2017-11-02 16:42:58 +0000
commit: 85d8f2394fabc93b4699fbf13bad4d86221b4c8f (patch)
tree: e8b6a6dff5fdda0acdc6f6f706a02fdca9e60fb3 /usr/src/cmd/fm
parent: 3942697fd5a28d8766c93571074d5704d56fc016 (diff)
download: illumos-joyent-85d8f2394fabc93b4699fbf13bad4d86221b4c8f.tar.gz
9 files changed, 19 insertions, 74 deletions
diff --git a/usr/src/cmd/fm/dicts/DISK.dict b/usr/src/cmd/fm/dicts/DISK.dict
index a7f6695193..93df523b9a 100644
--- a/usr/src/cmd/fm/dicts/DISK.dict
+++ b/usr/src/cmd/fm/dicts/DISK.dict
@@ -1,7 +1,6 @@
 #
 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
-# Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
 #
 # CDDL HEADER START
 #
@@ -33,4 +32,3 @@ fault.io.disk.over-temperature=1
 fault.io.disk.self-test-failure=2
 fault.io.scsi.cmd.disk.dev.rqs.derr=3
 fault.io.scsi.cmd.disk.dev.rqs.merr=4
-fault.io.disk.ssm-wearout=8
diff --git a/usr/src/cmd/fm/dicts/DISK.po b/usr/src/cmd/fm/dicts/DISK.po
index 11439f7404..504285915b 100644
--- a/usr/src/cmd/fm/dicts/DISK.po
+++ b/usr/src/cmd/fm/dicts/DISK.po
@@ -1,7 +1,6 @@
 #
 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
-# Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
 #
 # CDDL HEADER START
 #
@@ -105,19 +104,3 @@ msgid "DISK-8000-4Q.impact"
 msgstr "It is likely that continued operation will result in data corruption, which may eventually cause the loss of service or the service degradation.\n"
 msgid "DISK-8000-4Q.action"
 msgstr "Schedule a repair procedure to replace the affected device. Use 'fmadm faulty' to find the affected disk.\n"
-#
-# code: DISK-8000-8D
-# keys: fault.io.disk.ssm-wearout
-#
-msgid "DISK-8000-8D.type"
-msgstr "Fault"
-msgid "DISK-8000-8D.severity"
-msgstr "Major"
-msgid "DISK-8000-8D.description"
-msgstr "A solid state media device is nearing end of life as projected by the manufacturer."
-msgid "DISK-8000-8D.response"
-msgstr "None."
-msgid "DISK-8000-8D.impact"
-msgstr "Performance degradation is likely and continued operation of this device will cause drive failure and potential data loss."
-msgid "DISK-8000-8D.action"
-msgstr "Schedule a repair procedure to replace the affected drive.\nUse fmdump -V -u <EVENT_ID> to identify the drive."
diff --git a/usr/src/cmd/fm/eversholt/files/common/disk.esc b/usr/src/cmd/fm/eversholt/files/common/disk.esc
index c94a0b675d..f8b04a305e 100644
--- a/usr/src/cmd/fm/eversholt/files/common/disk.esc
+++ b/usr/src/cmd/fm/eversholt/files/common/disk.esc
@@ -20,7 +20,6 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #pragma dictionary "DISK"
@@ -45,8 +44,7 @@ asru P;
  * to trigger a fault when recovery/relocation is not possible.
  *
  * We let the engine propagate one error only once every 1 minute and then if we
- * still get 2 or more errors within 24 hours for the same LBA,
- * there is a fault.
+ * still get 2 or more * errors within 24 hours for the same LBA, there is a fault.
  */
 engine serd.io.scsi.cmd.disk.dev.rqs.merr@P, N=1, T=24h;
 
@@ -187,7 +185,6 @@ event fault.io.disk.predictive-failure@P, FITrate=10,
     FITrate=10, FRU=P, ASRU=P;
 event fault.io.disk.self-test-failure@P, FITrate=10,
     FITrate=10, FRU=P, ASRU=P;
-event fault.io.disk.ssm-wearout@P;
 
 /*
  * ereports.
@@ -195,7 +192,6 @@ event fault.io.disk.ssm-wearout@P;
 event ereport.io.scsi.disk.over-temperature@P;
 event ereport.io.scsi.disk.predictive-failure@P;
 event ereport.io.scsi.disk.self-test-failure@P;
-event ereport.io.scsi.disk.ssm-wearout@P;
 
 /*
  * Propagations.
@@ -210,10 +206,3 @@ prop fault.io.disk.predictive-failure@P ->
     ereport.io.scsi.disk.predictive-failure@P {
     setpayloadprop("asc", payloadprop("additional-sense-code")) &&
     setpayloadprop("ascq", payloadprop("additional-sense-code-qualifier")) };
-
-prop fault.io.disk.ssm-wearout@P ->
-    ereport.io.scsi.disk.ssm-wearout@P {
-    setpayloadprop("current-wearout-percentage",
-    payloadprop("current-ssm-wearout"))
-    && setpayloadprop("threshold-wearout-percentage",
-    payloadprop("threshold-ssm-wearout")) };
diff --git a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
index eb0abde752..167873cd8b 100644
--- a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
+++ b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c
@@ -22,7 +22,6 @@
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
- * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
@@ -159,10 +158,6 @@ dm_fault_execute_actions(fmd_hdl_t *hdl, diskmon_t *diskp, nvlist_t *nvl)
 	    DISK_ERROR_CLASS "." FM_FAULT_DISK_TESTFAIL))
 		action_prop = DISK_PROP_STFAILACTION;
 
-	if (fmd_nvl_class_match(hdl, nvl,
-	    DISK_ERROR_CLASS "." FM_FAULT_SSM_WEAROUT))
-		action_prop = DISK_PROP_SSMWEAROUTACTION;
-
 	dm_fault_indicator_set(diskp, INDICATOR_ON);
 
 	if (action_prop != NULL &&
diff --git a/usr/src/cmd/fm/modules/common/disk-monitor/diskmon_conf.h b/usr/src/cmd/fm/modules/common/disk-monitor/diskmon_conf.h
index 93a2655649..4c0a9bec83 100644
--- a/usr/src/cmd/fm/modules/common/disk-monitor/diskmon_conf.h
+++ b/usr/src/cmd/fm/modules/common/disk-monitor/diskmon_conf.h
@@ -22,12 +22,13 @@
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
- * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef _DISKMOND_CONF_H
 #define	_DISKMOND_CONF_H
 
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
 /*
  * Configuration File data
  */
@@ -61,7 +62,6 @@ extern "C" {
 #define	DISK_PROP_FRUACTION		"fru-update-action"
 #define	DISK_PROP_OTEMPACTION		"overtemp-action"
 #define	DISK_PROP_STFAILACTION		"selftest-fail-action"
-#define	DISK_PROP_SSMWEAROUTACTION	"ssm-wearout-action"
 
 /* Properties for the "ap" subentity */
 #define	DISK_AP_PROP_APID "path"
diff --git a/usr/src/cmd/fm/modules/common/disk-transport/disk-transport.conf b/usr/src/cmd/fm/modules/common/disk-transport/disk-transport.conf
index 8872aeeeb5..c8396be904 100644
--- a/usr/src/cmd/fm/modules/common/disk-transport/disk-transport.conf
+++ b/usr/src/cmd/fm/modules/common/disk-transport/disk-transport.conf
@@ -21,8 +21,5 @@
 #
 # Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
-# Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
 #
-#
-# To disable transport of "high solid state media used %" faults uncomment:
-# setprop ignore-ssm-wearout true
+#ident	"%Z%%M%	%I%	%E% SMI"
diff --git a/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c b/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c
index 6fdb97c497..e586f714cc 100644
--- a/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c
+++ b/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c
@@ -21,9 +21,10 @@
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
- * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  */
 
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
 /*
  * Disk error transport module
  *
@@ -31,18 +32,17 @@
  * and FMA ereports.  It is a read-only transport module, and checks for the
  * following failures:
  *
- *	- overtemp
- *	- predictive failure
- *	- self-test failure
- *	- solid state media wearout
+ * 	- overtemp
+ * 	- predictive failure
+ * 	- self-test failure
  *
  * These failures are detected via the TOPO_METH_DISK_STATUS method, which
  * leverages libdiskstatus to do the actual analysis.  This transport module is
  * in charge of the following tasks:
  *
- *	- discovering available devices
- *	- periodically checking devices
- *	- managing device addition/removal
+ * 	- discovering available devices
+ * 	- periodically checking devices
+ * 	- managing device addition/removal
  */
 
 #include <ctype.h>
@@ -113,6 +113,7 @@ dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
 	char *protocol;
 	int err;
 	disk_monitor_t *dmp = arg;
+	uint64_t ena;
 	nvpair_t *elem;
 	boolean_t fault;
 	nvlist_t *details;
@@ -151,8 +152,10 @@ dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
 
 	nvlist_free(in);
 
+	ena = fmd_event_ena_create(dmp->dm_hdl);
+
 	/*
-	 * Check for faults and post ereport(s) if needed
+	 * Add any faults.
 	 */
 	if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
 	    nvlist_lookup_string(result, "protocol", &protocol) == 0) {
@@ -167,15 +170,8 @@ dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
 			    &details) != 0)
 				continue;
 
-			if (strcmp(nvpair_name(elem),
-			    FM_EREPORT_SCSI_SSMWEAROUT) == 0 &&
-			    fmd_prop_get_int32(dmp->dm_hdl,
-			    "ignore-ssm-wearout") == FMD_B_TRUE)
-				continue;
-
 			dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
-			    nvpair_name(elem),
-			    fmd_event_ena_create(dmp->dm_hdl), fmri, details);
+			    nvpair_name(elem), ena, fmri, details);
 		}
 	}
 
@@ -252,7 +248,6 @@ static const fmd_prop_t fmd_props[] = {
 	{ "interval", FMD_TYPE_TIME, "1h" },
 	{ "min-interval", FMD_TYPE_TIME, "1min" },
 	{ "simulate", FMD_TYPE_STRING, "" },
-	{ "ignore-ssm-wearout", FMD_TYPE_BOOL, "false"},
 	{ NULL, 0, NULL }
 };
 
@@ -267,7 +262,7 @@ static const fmd_hdl_ops_t fmd_ops = {
 };
 
 static const fmd_hdl_info_t fmd_info = {
-	"Disk Transport Agent", "1.1", &fmd_ops, fmd_props
+	"Disk Transport Agent", "1.0", &fmd_ops, fmd_props
 };
 
 void
@@ -294,7 +289,7 @@ _fmd_init(fmd_hdl_t *hdl)
 	 * the developer to substitute a faulty device based off all or part of
 	 * an FMRI string.  For example, one could do:
 	 *
-	 *	setprop simulate "bay=4/disk=4	/path/to/sim.so"
+	 * 	setprop simulate "bay=4/disk=4	/path/to/sim.so"
 	 *
 	 * When the transport module encounters an FMRI containing the given
 	 * string, then it will open the simulator file instead of the
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
index 250ce99b8b..39cde90f01 100644
--- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
@@ -25,9 +25,6 @@
 #
 # fmd configuration file for the zfs retire agent.
 #
-# To enable automated retire for SSM wearout faults uncomment the line below:
-# setprop ssm_wearout_skip_retire false
-#
 subscribe fault.fs.zfs.*
 subscribe fault.io.*
 subscribe resource.fs.zfs.removed
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
index 4856676d7d..263b91cc19 100644
--- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
@@ -427,14 +427,6 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
 		    &retire) == 0 && retire == 0)
 			continue;
 
-		if (fmd_nvl_class_match(hdl, fault,
-		    "fault.io.disk.ssm-wearout") &&
-		    fmd_prop_get_int32(hdl, "ssm_wearout_skip_retire") ==
-		    FMD_B_TRUE) {
-			fmd_hdl_debug(hdl, "zfs-retire: ignoring SSM fault");
-			continue;
-		}
-
 		/*
 		 * While we subscribe to fault.fs.zfs.*, we only take action
 		 * for faults targeting a specific vdev (open failure or SERD
@@ -569,7 +561,6 @@ static const fmd_hdl_ops_t fmd_ops = {
 
 static const fmd_prop_t fmd_props[] = {
 	{ "spare_on_remove", FMD_TYPE_BOOL, "true" },
-	{ "ssm_wearout_skip_retire", FMD_TYPE_BOOL, "true"},
 	{ NULL, 0, NULL }
 };
author	Robert Mustacchi <rm@joyent.com>	2017-11-02 16:30:37 +0000
committer	Robert Mustacchi <rm@joyent.com>	2017-11-02 16:42:58 +0000
commit	85d8f2394fabc93b4699fbf13bad4d86221b4c8f (patch)
tree	e8b6a6dff5fdda0acdc6f6f706a02fdca9e60fb3 /usr/src/cmd/fm
parent	3942697fd5a28d8766c93571074d5704d56fc016 (diff)
download	illumos-joyent-85d8f2394fabc93b4699fbf13bad4d86221b4c8f.tar.gz