diff options
| author | Robert Mustacchi <rm@joyent.com> | 2017-11-02 16:30:37 +0000 |
|---|---|---|
| committer | Robert Mustacchi <rm@joyent.com> | 2017-11-02 16:42:58 +0000 |
| commit | 85d8f2394fabc93b4699fbf13bad4d86221b4c8f (patch) | |
| tree | e8b6a6dff5fdda0acdc6f6f706a02fdca9e60fb3 /usr/src/cmd/fm | |
| parent | 3942697fd5a28d8766c93571074d5704d56fc016 (diff) | |
| download | illumos-joyent-85d8f2394fabc93b4699fbf13bad4d86221b4c8f.tar.gz | |
backout: 8074 need to add FMA event for SSD wearout (needs work)
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
Diffstat (limited to 'usr/src/cmd/fm')
9 files changed, 19 insertions, 74 deletions
diff --git a/usr/src/cmd/fm/dicts/DISK.dict b/usr/src/cmd/fm/dicts/DISK.dict index a7f6695193..93df523b9a 100644 --- a/usr/src/cmd/fm/dicts/DISK.dict +++ b/usr/src/cmd/fm/dicts/DISK.dict @@ -1,7 +1,6 @@ # # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# Copyright 2016 Nexenta Systems, Inc. All rights reserved. # # CDDL HEADER START # @@ -33,4 +32,3 @@ fault.io.disk.over-temperature=1 fault.io.disk.self-test-failure=2 fault.io.scsi.cmd.disk.dev.rqs.derr=3 fault.io.scsi.cmd.disk.dev.rqs.merr=4 -fault.io.disk.ssm-wearout=8 diff --git a/usr/src/cmd/fm/dicts/DISK.po b/usr/src/cmd/fm/dicts/DISK.po index 11439f7404..504285915b 100644 --- a/usr/src/cmd/fm/dicts/DISK.po +++ b/usr/src/cmd/fm/dicts/DISK.po @@ -1,7 +1,6 @@ # # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# Copyright 2016 Nexenta Systems, Inc. All rights reserved. # # CDDL HEADER START # @@ -105,19 +104,3 @@ msgid "DISK-8000-4Q.impact" msgstr "It is likely that continued operation will result in data corruption, which may eventually cause the loss of service or the service degradation.\n" msgid "DISK-8000-4Q.action" msgstr "Schedule a repair procedure to replace the affected device. Use 'fmadm faulty' to find the affected disk.\n" -# -# code: DISK-8000-8D -# keys: fault.io.disk.ssm-wearout -# -msgid "DISK-8000-8D.type" -msgstr "Fault" -msgid "DISK-8000-8D.severity" -msgstr "Major" -msgid "DISK-8000-8D.description" -msgstr "A solid state media device is nearing end of life as projected by the manufacturer." -msgid "DISK-8000-8D.response" -msgstr "None." -msgid "DISK-8000-8D.impact" -msgstr "Performance degradation is likely and continued operation of this device will cause drive failure and potential data loss." -msgid "DISK-8000-8D.action" -msgstr "Schedule a repair procedure to replace the affected drive.\nUse fmdump -V -u <EVENT_ID> to identify the drive." diff --git a/usr/src/cmd/fm/eversholt/files/common/disk.esc b/usr/src/cmd/fm/eversholt/files/common/disk.esc index c94a0b675d..f8b04a305e 100644 --- a/usr/src/cmd/fm/eversholt/files/common/disk.esc +++ b/usr/src/cmd/fm/eversholt/files/common/disk.esc @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ #pragma dictionary "DISK" @@ -45,8 +44,7 @@ asru P; * to trigger a fault when recovery/relocation is not possible. * * We let the engine propagate one error only once every 1 minute and then if we - * still get 2 or more errors within 24 hours for the same LBA, - * there is a fault. + * still get 2 or more * errors within 24 hours for the same LBA, there is a fault. */ engine serd.io.scsi.cmd.disk.dev.rqs.merr@P, N=1, T=24h; @@ -187,7 +185,6 @@ event fault.io.disk.predictive-failure@P, FITrate=10, FITrate=10, FRU=P, ASRU=P; event fault.io.disk.self-test-failure@P, FITrate=10, FITrate=10, FRU=P, ASRU=P; -event fault.io.disk.ssm-wearout@P; /* * ereports. @@ -195,7 +192,6 @@ event fault.io.disk.ssm-wearout@P; event ereport.io.scsi.disk.over-temperature@P; event ereport.io.scsi.disk.predictive-failure@P; event ereport.io.scsi.disk.self-test-failure@P; -event ereport.io.scsi.disk.ssm-wearout@P; /* * Propagations. @@ -210,10 +206,3 @@ prop fault.io.disk.predictive-failure@P -> ereport.io.scsi.disk.predictive-failure@P { setpayloadprop("asc", payloadprop("additional-sense-code")) && setpayloadprop("ascq", payloadprop("additional-sense-code-qualifier")) }; - -prop fault.io.disk.ssm-wearout@P -> - ereport.io.scsi.disk.ssm-wearout@P { - setpayloadprop("current-wearout-percentage", - payloadprop("current-ssm-wearout")) - && setpayloadprop("threshold-wearout-percentage", - payloadprop("threshold-ssm-wearout")) }; diff --git a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c index eb0abde752..167873cd8b 100644 --- a/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c +++ b/usr/src/cmd/fm/modules/common/disk-monitor/disk_monitor.c @@ -22,7 +22,6 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ /* @@ -159,10 +158,6 @@ dm_fault_execute_actions(fmd_hdl_t *hdl, diskmon_t *diskp, nvlist_t *nvl) DISK_ERROR_CLASS "." FM_FAULT_DISK_TESTFAIL)) action_prop = DISK_PROP_STFAILACTION; - if (fmd_nvl_class_match(hdl, nvl, - DISK_ERROR_CLASS "." FM_FAULT_SSM_WEAROUT)) - action_prop = DISK_PROP_SSMWEAROUTACTION; - dm_fault_indicator_set(diskp, INDICATOR_ON); if (action_prop != NULL && diff --git a/usr/src/cmd/fm/modules/common/disk-monitor/diskmon_conf.h b/usr/src/cmd/fm/modules/common/disk-monitor/diskmon_conf.h index 93a2655649..4c0a9bec83 100644 --- a/usr/src/cmd/fm/modules/common/disk-monitor/diskmon_conf.h +++ b/usr/src/cmd/fm/modules/common/disk-monitor/diskmon_conf.h @@ -22,12 +22,13 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ #ifndef _DISKMOND_CONF_H #define _DISKMOND_CONF_H +#pragma ident "%Z%%M% %I% %E% SMI" + /* * Configuration File data */ @@ -61,7 +62,6 @@ extern "C" { #define DISK_PROP_FRUACTION "fru-update-action" #define DISK_PROP_OTEMPACTION "overtemp-action" #define DISK_PROP_STFAILACTION "selftest-fail-action" -#define DISK_PROP_SSMWEAROUTACTION "ssm-wearout-action" /* Properties for the "ap" subentity */ #define DISK_AP_PROP_APID "path" diff --git a/usr/src/cmd/fm/modules/common/disk-transport/disk-transport.conf b/usr/src/cmd/fm/modules/common/disk-transport/disk-transport.conf index 8872aeeeb5..c8396be904 100644 --- a/usr/src/cmd/fm/modules/common/disk-transport/disk-transport.conf +++ b/usr/src/cmd/fm/modules/common/disk-transport/disk-transport.conf @@ -21,8 +21,5 @@ # # Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# Copyright 2016 Nexenta Systems, Inc. All rights reserved. # -# -# To disable transport of "high solid state media used %" faults uncomment: -# setprop ignore-ssm-wearout true +#ident "%Z%%M% %I% %E% SMI" diff --git a/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c b/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c index 6fdb97c497..e586f714cc 100644 --- a/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c +++ b/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c @@ -21,9 +21,10 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ +#pragma ident "%Z%%M% %I% %E% SMI" + /* * Disk error transport module * @@ -31,18 +32,17 @@ * and FMA ereports. It is a read-only transport module, and checks for the * following failures: * - * - overtemp - * - predictive failure - * - self-test failure - * - solid state media wearout + * - overtemp + * - predictive failure + * - self-test failure * * These failures are detected via the TOPO_METH_DISK_STATUS method, which * leverages libdiskstatus to do the actual analysis. This transport module is * in charge of the following tasks: * - * - discovering available devices - * - periodically checking devices - * - managing device addition/removal + * - discovering available devices + * - periodically checking devices + * - managing device addition/removal */ #include <ctype.h> @@ -113,6 +113,7 @@ dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg) char *protocol; int err; disk_monitor_t *dmp = arg; + uint64_t ena; nvpair_t *elem; boolean_t fault; nvlist_t *details; @@ -151,8 +152,10 @@ dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg) nvlist_free(in); + ena = fmd_event_ena_create(dmp->dm_hdl); + /* - * Check for faults and post ereport(s) if needed + * Add any faults. */ if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 && nvlist_lookup_string(result, "protocol", &protocol) == 0) { @@ -167,15 +170,8 @@ dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg) &details) != 0) continue; - if (strcmp(nvpair_name(elem), - FM_EREPORT_SCSI_SSMWEAROUT) == 0 && - fmd_prop_get_int32(dmp->dm_hdl, - "ignore-ssm-wearout") == FMD_B_TRUE) - continue; - dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol, - nvpair_name(elem), - fmd_event_ena_create(dmp->dm_hdl), fmri, details); + nvpair_name(elem), ena, fmri, details); } } @@ -252,7 +248,6 @@ static const fmd_prop_t fmd_props[] = { { "interval", FMD_TYPE_TIME, "1h" }, { "min-interval", FMD_TYPE_TIME, "1min" }, { "simulate", FMD_TYPE_STRING, "" }, - { "ignore-ssm-wearout", FMD_TYPE_BOOL, "false"}, { NULL, 0, NULL } }; @@ -267,7 +262,7 @@ static const fmd_hdl_ops_t fmd_ops = { }; static const fmd_hdl_info_t fmd_info = { - "Disk Transport Agent", "1.1", &fmd_ops, fmd_props + "Disk Transport Agent", "1.0", &fmd_ops, fmd_props }; void @@ -294,7 +289,7 @@ _fmd_init(fmd_hdl_t *hdl) * the developer to substitute a faulty device based off all or part of * an FMRI string. For example, one could do: * - * setprop simulate "bay=4/disk=4 /path/to/sim.so" + * setprop simulate "bay=4/disk=4 /path/to/sim.so" * * When the transport module encounters an FMRI containing the given * string, then it will open the simulator file instead of the diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf index 250ce99b8b..39cde90f01 100644 --- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf +++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf @@ -25,9 +25,6 @@ # # fmd configuration file for the zfs retire agent. # -# To enable automated retire for SSM wearout faults uncomment the line below: -# setprop ssm_wearout_skip_retire false -# subscribe fault.fs.zfs.* subscribe fault.io.* subscribe resource.fs.zfs.removed diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c index 4856676d7d..263b91cc19 100644 --- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c +++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c @@ -427,14 +427,6 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, &retire) == 0 && retire == 0) continue; - if (fmd_nvl_class_match(hdl, fault, - "fault.io.disk.ssm-wearout") && - fmd_prop_get_int32(hdl, "ssm_wearout_skip_retire") == - FMD_B_TRUE) { - fmd_hdl_debug(hdl, "zfs-retire: ignoring SSM fault"); - continue; - } - /* * While we subscribe to fault.fs.zfs.*, we only take action * for faults targeting a specific vdev (open failure or SERD @@ -569,7 +561,6 @@ static const fmd_hdl_ops_t fmd_ops = { static const fmd_prop_t fmd_props[] = { { "spare_on_remove", FMD_TYPE_BOOL, "true" }, - { "ssm_wearout_skip_retire", FMD_TYPE_BOOL, "true"}, { NULL, 0, NULL } }; |
