diff options
author | eschrock <none@none> | 2008-06-27 17:05:29 -0700 |
---|---|---|
committer | eschrock <none@none> | 2008-06-27 17:05:29 -0700 |
commit | 51ece83525fa18f5e72627610f480dffc7e492fd (patch) | |
tree | b3973e98ae2a5df5f30c6a801d9cbcf250312d82 /usr/src | |
parent | 68038c2cd7751a1fc2b5c55e38609e137fa80807 (diff) | |
download | illumos-gate-51ece83525fa18f5e72627610f480dffc7e492fd.tar.gz |
PSARC 2008/388 Short circuit for vdev probe failure
6410382 ZFS rename should update ->v_path
6633592 vdev_root_open() faulted logic doesn't work
6674674 zfs needs to handle more than EIO
6705158 failed log device should not be treated like normal toplevel vdev
6705232 vdev_not_present erroneously cleared in vdev_validate()
6705649 libses/libscsi plugins should be included in ABI exception list
6707536 zfs_case_timeout and associated code is unused
6707637 vdev_propagate_state() shouldn't skip log devices
6708095 better FMA integration for vdev probe failure
6708192 typo in fix for 6646106 breaks pool I/O failure handling
6709112 zio_should_retry() should account for vdev_is_failing
6718542 zinject doesn't work for file I/O probe failure
6719105 ses2: incorrect parsing of page 15 (nickname)
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c | 143 | ||||
-rw-r--r-- | usr/src/lib/scsi/plugins/ses/ses2/common/ses2_enclosure.c | 2 | ||||
-rw-r--r-- | usr/src/tools/abi/etc/exceptions | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs3_srv.c | 14 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_srv.c | 14 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_srv.c | 14 | ||||
-rw-r--r-- | usr/src/uts/common/fs/vnode.c | 18 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev.c | 36 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_disk.c | 16 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_file.c | 21 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_label.c | 12 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_root.c | 29 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_fm.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_vnops.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zio.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/sys/fm/fs/zfs.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/sys/vnode.h | 1 |
17 files changed, 190 insertions, 153 deletions
diff --git a/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c b/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c index c69893a8e1..71a4b06737 100644 --- a/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c +++ b/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c @@ -42,38 +42,49 @@ */ #define MAX_SERDLEN (16 * 2 + sizeof ("zfs___checksum")) +/* + * On-disk case structure. This must maintain backwards compatibility with + * previous versions of the DE. By default, any members appended to the end + * will be filled with zeros if they don't exist in a previous version. + */ typedef struct zfs_case_data { uint64_t zc_version; uint64_t zc_ena; uint64_t zc_pool_guid; uint64_t zc_vdev_guid; - int zc_has_timer; + int zc_has_timer; /* defunct */ int zc_pool_state; char zc_serd_checksum[MAX_SERDLEN]; char zc_serd_io[MAX_SERDLEN]; - int zc_has_serd_timer; + int zc_has_remove_timer; } zfs_case_data_t; +/* + * In-core case structure. + */ typedef struct zfs_case { boolean_t zc_present; uint32_t zc_version; zfs_case_data_t zc_data; fmd_case_t *zc_case; uu_list_node_t zc_node; - id_t zc_timer; - id_t zc_serd_timer; + id_t zc_remove_timer; } zfs_case_t; #define CASE_DATA "data" #define CASE_DATA_VERSION_INITIAL 1 #define CASE_DATA_VERSION_SERD 2 -static hrtime_t zfs_case_timeout; -static hrtime_t zfs_serd_timeout; +static hrtime_t zfs_remove_timeout; uu_list_pool_t *zfs_case_pool; uu_list_t *zfs_cases; +#define ZFS_MAKE_RSRC(type) \ + FM_RSRC_CLASS "." ZFS_ERROR_CLASS "." type +#define ZFS_MAKE_EREPORT(type) \ + FM_EREPORT_CLASS "." ZFS_ERROR_CLASS "." type + /* * Write out the persistent representation of an active case. */ @@ -114,12 +125,9 @@ zfs_case_unserialize(fmd_hdl_t *hdl, fmd_case_t *cp) * doesn't include the SERD engine name. */ - if (zcp->zc_data.zc_has_timer) - zcp->zc_timer = fmd_timer_install(hdl, zcp, - NULL, zfs_case_timeout); - if (zcp->zc_data.zc_has_serd_timer) - zcp->zc_serd_timer = fmd_timer_install(hdl, zcp, - NULL, zfs_serd_timeout); + if (zcp->zc_data.zc_has_remove_timer) + zcp->zc_remove_timer = fmd_timer_install(hdl, zcp, + NULL, zfs_remove_timeout); (void) uu_list_insert_before(zfs_cases, NULL, zcp); @@ -310,14 +318,9 @@ zfs_case_solve(fmd_hdl_t *hdl, zfs_case_t *zcp, const char *faultname, fmd_case_solve(hdl, zcp->zc_case); serialize = B_FALSE; - if (zcp->zc_data.zc_has_timer) { - fmd_timer_remove(hdl, zcp->zc_timer); - zcp->zc_data.zc_has_timer = 0; - serialize = B_TRUE; - } - if (zcp->zc_data.zc_has_serd_timer) { - fmd_timer_remove(hdl, zcp->zc_serd_timer); - zcp->zc_data.zc_has_serd_timer = 0; + if (zcp->zc_data.zc_has_remove_timer) { + fmd_timer_remove(hdl, zcp->zc_remove_timer); + zcp->zc_data.zc_has_remove_timer = 0; serialize = B_TRUE; } if (serialize) @@ -338,7 +341,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) uint64_t ena, pool_guid, vdev_guid; nvlist_t *detector; boolean_t isresource; - const char *serd; + boolean_t checkremove; isresource = fmd_nvl_class_match(hdl, nvl, "resource.fs.zfs.*"); @@ -459,7 +462,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) if (isresource) { if (fmd_nvl_class_match(hdl, nvl, - "resource.fs.zfs.autoreplace")) { + ZFS_MAKE_RSRC(FM_RESOURCE_AUTOREPLACE))) { /* * The 'resource.fs.zfs.autoreplace' event indicates * that the pool was loaded with the 'autoreplace' @@ -469,7 +472,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) */ fmd_case_close(hdl, zcp->zc_case); } else if (fmd_nvl_class_match(hdl, nvl, - "resource.fs.zfs.removed")) { + ZFS_MAKE_RSRC(FM_RESOURCE_REMOVED))) { /* * The 'resource.fs.zfs.removed' event indicates that * device removal was detected, and the device was @@ -479,9 +482,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) * We reset the SERD engine, and cancel any pending * timers. */ - if (zcp->zc_data.zc_has_serd_timer) { - fmd_timer_remove(hdl, zcp->zc_serd_timer); - zcp->zc_data.zc_has_serd_timer = 0; + if (zcp->zc_data.zc_has_remove_timer) { + fmd_timer_remove(hdl, zcp->zc_remove_timer); + zcp->zc_data.zc_has_remove_timer = 0; zfs_case_serialize(hdl, zcp); } if (zcp->zc_data.zc_serd_io[0] != '\0') @@ -519,7 +522,8 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) * succeeded, we associate a timer with the event. When it expires, we * close the case. */ - if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.zpool")) { + if (fmd_nvl_class_match(hdl, nvl, + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_POOL))) { /* * Pool level fault. Before solving the case, go through and * close any open device cases that may be pending. @@ -542,9 +546,14 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) * within the timeout period, then we'll solve the device case. */ zfs_case_solve(hdl, zcp, "fault.fs.zfs.device", B_TRUE); - } else if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.io") || - fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.checksum") || - fmd_nvl_class_match(hdl, nvl, "ererpot.fs.zfs.io_failure")) { + } else if (fmd_nvl_class_match(hdl, nvl, + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO)) || + fmd_nvl_class_match(hdl, nvl, + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM)) || + fmd_nvl_class_match(hdl, nvl, + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO_FAILURE)) || + fmd_nvl_class_match(hdl, nvl, + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) { char *failmode = NULL; /* @@ -554,8 +563,8 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) * (persistent errors for a single data block, etc). For now, * a single SERD engine is sufficient. */ - serd = NULL; - if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.io")) { + if (fmd_nvl_class_match(hdl, nvl, + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO))) { if (zcp->zc_data.zc_serd_io[0] == '\0') { zfs_serd_name(zcp->zc_data.zc_serd_io, pool_guid, vdev_guid, "io"); @@ -564,9 +573,10 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) fmd_prop_get_int64(hdl, "io_T")); zfs_case_serialize(hdl, zcp); } - serd = zcp->zc_data.zc_serd_io; + if (fmd_serd_record(hdl, zcp->zc_data.zc_serd_io, ep)) + checkremove = B_TRUE; } else if (fmd_nvl_class_match(hdl, nvl, - "ereport.fs.zfs.checksum")) { + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) { if (zcp->zc_data.zc_serd_checksum[0] == '\0') { zfs_serd_name(zcp->zc_data.zc_serd_checksum, pool_guid, vdev_guid, "checksum"); @@ -576,9 +586,14 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) fmd_prop_get_int64(hdl, "checksum_T")); zfs_case_serialize(hdl, zcp); } - serd = zcp->zc_data.zc_serd_checksum; + if (fmd_serd_record(hdl, + zcp->zc_data.zc_serd_checksum, ep)) { + zfs_case_solve(hdl, zcp, + "fault.fs.zfs.vdev.checksum", B_FALSE); + } } else if (fmd_nvl_class_match(hdl, nvl, - "ereport.fs.zfs.io_failure") && (nvlist_lookup_string(nvl, + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_IO_FAILURE)) && + (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE, &failmode) == 0) && failmode != NULL) { if (strncmp(failmode, FM_EREPORT_FAILMODE_CONTINUE, @@ -591,6 +606,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) zfs_case_solve(hdl, zcp, "fault.fs.zfs.io_failure_wait", B_FALSE); } + } else if (fmd_nvl_class_match(hdl, nvl, + ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) { + checkremove = B_TRUE; } /* @@ -598,13 +616,13 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) * any diagnosis until we're sure that we aren't about to * receive a 'resource.fs.zfs.removed' event. */ - if (serd && fmd_serd_record(hdl, serd, ep)) { - if (zcp->zc_data.zc_has_serd_timer) - fmd_timer_remove(hdl, zcp->zc_serd_timer); - zcp->zc_serd_timer = fmd_timer_install(hdl, zcp, NULL, - zfs_serd_timeout); - if (!zcp->zc_data.zc_has_serd_timer) { - zcp->zc_data.zc_has_serd_timer = 1; + if (checkremove) { + if (zcp->zc_data.zc_has_remove_timer) + fmd_timer_remove(hdl, zcp->zc_remove_timer); + zcp->zc_remove_timer = fmd_timer_install(hdl, zcp, NULL, + zfs_remove_timeout); + if (!zcp->zc_data.zc_has_remove_timer) { + zcp->zc_data.zc_has_remove_timer = 1; zfs_case_serialize(hdl, zcp); } } @@ -612,37 +630,17 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) } /* - * Timeout indicates one of two scenarios: - * - * - A device could not be opened while opening a pool, but the pool - * itself was opened successfully. - * - * - We diagnosed an I/O error, and it was not due to device removal (which - * would cause the timeout to be cancelled). + * The timeout is fired when we diagnosed an I/O error, and it was not due to + * device removal (which would cause the timeout to be cancelled). */ /* ARGSUSED */ static void zfs_fm_timeout(fmd_hdl_t *hdl, id_t id, void *data) { zfs_case_t *zcp = data; - const char *faultname; - if (id == zcp->zc_timer) { - zcp->zc_data.zc_has_timer = 0; - zfs_case_solve(hdl, zcp, "fault.fs.zfs.device", B_TRUE); - } - - if (id == zcp->zc_serd_timer) { - if (zcp->zc_data.zc_serd_io[0] != '\0' && - fmd_serd_fired(hdl, zcp->zc_data.zc_serd_io)) { - faultname = "fault.fs.zfs.vdev.io"; - } else { - assert(fmd_serd_fired(hdl, - zcp->zc_data.zc_serd_checksum)); - faultname = "fault.fs.zfs.vdev.checksum"; - } - zfs_case_solve(hdl, zcp, faultname, B_FALSE); - } + if (id == zcp->zc_remove_timer) + zfs_case_solve(hdl, zcp, "fault.fs.zfs.vdev.io", B_FALSE); } static void @@ -654,10 +652,8 @@ zfs_fm_close(fmd_hdl_t *hdl, fmd_case_t *cs) fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_checksum); if (zcp->zc_data.zc_serd_io[0] != '\0') fmd_serd_destroy(hdl, zcp->zc_data.zc_serd_io); - if (zcp->zc_data.zc_has_timer) - fmd_timer_remove(hdl, zcp->zc_timer); - if (zcp->zc_data.zc_has_serd_timer) - fmd_timer_remove(hdl, zcp->zc_serd_timer); + if (zcp->zc_data.zc_has_remove_timer) + fmd_timer_remove(hdl, zcp->zc_remove_timer); uu_list_remove(zfs_cases, zcp); fmd_hdl_free(hdl, zcp, sizeof (zfs_case_t)); } @@ -686,7 +682,7 @@ static const fmd_prop_t fmd_props[] = { { "checksum_T", FMD_TYPE_TIME, "10min" }, { "io_N", FMD_TYPE_UINT32, "10" }, { "io_T", FMD_TYPE_TIME, "10min" }, - { "serd_timeout", FMD_TYPE_TIME, "5sec" }, + { "remove_timeout", FMD_TYPE_TIME, "5sec" }, { NULL, 0, NULL } }; @@ -738,8 +734,7 @@ _fmd_init(fmd_hdl_t *hdl) */ zfs_purge_cases(hdl); - zfs_case_timeout = fmd_prop_get_int64(hdl, "case_timeout"); - zfs_serd_timeout = fmd_prop_get_int64(hdl, "serd_timeout"); + zfs_remove_timeout = fmd_prop_get_int64(hdl, "remove_timeout"); } void diff --git a/usr/src/lib/scsi/plugins/ses/ses2/common/ses2_enclosure.c b/usr/src/lib/scsi/plugins/ses/ses2/common/ses2_enclosure.c index 099d1a5777..f6bcf8b46d 100644 --- a/usr/src/lib/scsi/plugins/ses/ses2/common/ses2_enclosure.c +++ b/usr/src/lib/scsi/plugins/ses/ses2/common/ses2_enclosure.c @@ -273,7 +273,7 @@ enc_parse_subnick(ses_plugin_t *sp, ses_node_t *np) return (0); for (dip = &spip->sspci_subnicks[0], i = 0; - i < spip->sspci_n_subenclosures; + i <= spip->sspci_n_subenclosures; i++, dip++) { if (!SES_WITHIN_PAGE_STRUCT(dip, spip, len)) break; diff --git a/usr/src/tools/abi/etc/exceptions b/usr/src/tools/abi/etc/exceptions index 0df1e40555..6bbc04c1ee 100644 --- a/usr/src/tools/abi/etc/exceptions +++ b/usr/src/tools/abi/etc/exceptions @@ -239,6 +239,7 @@ PSARC 2004/619: RULE W1: usr/lib/iconv/amd64/geniconvtbl.so 4773624: RULE W1: usr/lib/locale/iso_8859_1/ 4773624: RULE W1: usr/lib/fm/topo/ 6233613: RULE W1: usr/platform/sun4v/lib/mdb/ +6705649: RULE W1: usr/lib/scsi/plugins/ ############################################# # WARNINGs exempted from RULE W2 (See RULES section of intf_check manpage) diff --git a/usr/src/uts/common/fs/nfs/nfs3_srv.c b/usr/src/uts/common/fs/nfs/nfs3_srv.c index e0669e8a67..fcd024b45d 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs3_srv.c @@ -2819,19 +2819,9 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi, } nbl_end_crit(srcvp); } - if (error == 0) { - char *tmp; - - /* fix the path name for the renamed file */ - mutex_enter(&srcvp->v_lock); - tmp = srcvp->v_path; - srcvp->v_path = NULL; - mutex_exit(&srcvp->v_lock); - vn_setpath(rootdir, tvp, srcvp, args->to.name, + if (error == 0) + vn_renamepath(tvp, srcvp, args->to.name, strlen(args->to.name)); - if (tmp != NULL) - kmem_free(tmp, strlen(tmp) + 1); - } VN_RELE(srcvp); srcvp = NULL; diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c index 3d70b771a3..ec2a40ac9a 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c @@ -4325,18 +4325,8 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, VN_RELE(tvp); } } - if (error == 0) { - char *tmp; - - /* fix the path name for the renamed file */ - mutex_enter(&srcvp->v_lock); - tmp = srcvp->v_path; - srcvp->v_path = NULL; - mutex_exit(&srcvp->v_lock); - vn_setpath(rootdir, ndvp, srcvp, nnm, nlen - 1); - if (tmp != NULL) - kmem_free(tmp, strlen(tmp) + 1); - } + if (error == 0) + vn_renamepath(ndvp, srcvp, nnm, nlen - 1); if (in_crit_src) nbl_end_crit(srcvp); diff --git a/usr/src/uts/common/fs/nfs/nfs_srv.c b/usr/src/uts/common/fs/nfs/nfs_srv.c index e2a97f813d..ab185eefca 100644 --- a/usr/src/uts/common/fs/nfs/nfs_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs_srv.c @@ -2236,19 +2236,9 @@ rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, tovp, args->rna_to.da_name, cr, NULL, 0); TRACE_0(TR_FAC_NFS, TR_VOP_RENAME_END, "vop_rename_end:"); - if (error == 0) { - char *tmp; - - /* fix the path name for the renamed file */ - mutex_enter(&srcvp->v_lock); - tmp = srcvp->v_path; - srcvp->v_path = NULL; - mutex_exit(&srcvp->v_lock); - vn_setpath(rootdir, tovp, srcvp, args->rna_to.da_name, + if (error == 0) + vn_renamepath(tovp, srcvp, args->rna_to.da_name, strlen(args->rna_to.da_name)); - if (tmp != NULL) - kmem_free(tmp, strlen(tmp) + 1); - } /* * Force modified data and metadata out to stable storage. diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c index 1a885bf32e..5c1f8a96b6 100644 --- a/usr/src/uts/common/fs/vnode.c +++ b/usr/src/uts/common/fs/vnode.c @@ -2939,6 +2939,24 @@ vn_setpath_str(struct vnode *vp, const char *str, size_t len) } /* + * Called from within filesystem's vop_rename() to handle renames once the + * target vnode is available. + */ +void +vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len) +{ + char *tmp; + + mutex_enter(&vp->v_lock); + tmp = vp->v_path; + vp->v_path = NULL; + mutex_exit(&vp->v_lock); + vn_setpath(rootdir, dvp, vp, nm, len); + if (tmp != NULL) + kmem_free(tmp, strlen(tmp) + 1); +} + +/* * Similar to vn_setpath_str(), this function sets the path of the destination * vnode to the be the same as the source vnode. */ diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 7d549f3508..f3b3ecd66e 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -1038,14 +1038,15 @@ vdev_validate(vdev_t *vd) if (spa->spa_load_state == SPA_LOAD_OPEN && state != POOL_STATE_ACTIVE) return (EBADF); - } - /* - * If we were able to open and validate a vdev that was previously - * marked permanently unavailable, clear that state now. - */ - if (vd->vdev_not_present) - vd->vdev_not_present = 0; + /* + * If we were able to open and validate a vdev that was + * previously marked permanently unavailable, clear that state + * now. + */ + if (vd->vdev_not_present) + vd->vdev_not_present = 0; + } return (0); } @@ -2102,12 +2103,21 @@ vdev_propagate_state(vdev_t *vd) if (vd->vdev_children > 0) { for (c = 0; c < vd->vdev_children; c++) { child = vd->vdev_child[c]; - if (vdev_is_dead(child) && !vdev_readable(child)) - faulted++; - else if (child->vdev_stat.vs_aux == VDEV_AUX_IO_FAILURE) - faulted++; - else if (child->vdev_state <= VDEV_STATE_DEGRADED) + + if ((vdev_is_dead(child) && !vdev_readable(child)) || + child->vdev_stat.vs_aux == VDEV_AUX_IO_FAILURE) { + /* + * Root special: if there is a top-level log + * device, treat the root vdev as if it were + * degraded. + */ + if (child->vdev_islog && vd == rvd) + degraded++; + else + faulted++; + } else if (child->vdev_state <= VDEV_STATE_DEGRADED) { degraded++; + } if (child->vdev_stat.vs_aux == VDEV_AUX_CORRUPT_DATA) corrupted++; @@ -2127,7 +2137,7 @@ vdev_propagate_state(vdev_t *vd) VDEV_AUX_CORRUPT_DATA); } - if (vd->vdev_parent && !vd->vdev_islog) + if (vd->vdev_parent) vdev_propagate_state(vd->vdev_parent); } diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c index 2b7c00d982..7a55f52380 100644 --- a/usr/src/uts/common/fs/zfs/vdev_disk.c +++ b/usr/src/uts/common/fs/zfs/vdev_disk.c @@ -33,6 +33,7 @@ #include <sys/fs/zfs.h> #include <sys/zio.h> #include <sys/sunldi.h> +#include <sys/fm/fs/zfs.h> /* * Virtual device vector for disks. @@ -405,7 +406,14 @@ vdev_disk_io_intr(buf_t *bp) vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp; zio_t *zio = vdb->vdb_io; - if ((zio->io_error = geterror(bp)) == 0 && bp->b_resid != 0) + /* + * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO. + * Rather than teach the rest of the stack about other error + * possibilities (EFAULT, etc), we normalize the error value here. + */ + zio->io_error = (geterror(bp) != 0 ? EIO : 0); + + if (zio->io_error == 0 && bp->b_resid != 0) zio->io_error = EIO; kmem_free(vdb, sizeof (vdev_disk_buf_t)); @@ -564,7 +572,11 @@ vdev_disk_io_done(zio_t *zio) spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); } else if (vdev_probe(vd) != 0) { ASSERT(vd->vdev_ops->vdev_op_leaf); - vd->vdev_is_failing = B_TRUE; + if (!vd->vdev_is_failing) { + vd->vdev_is_failing = B_TRUE; + zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, + vd->vdev_spa, vd, zio, 0, 0); + } } } diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c index c53112578b..b0ca32429c 100644 --- a/usr/src/uts/common/fs/zfs/vdev_file.c +++ b/usr/src/uts/common/fs/zfs/vdev_file.c @@ -31,6 +31,7 @@ #include <sys/vdev_impl.h> #include <sys/zio.h> #include <sys/fs/zfs.h> +#include <sys/fm/fs/zfs.h> /* * Virtual device vector for files. @@ -144,9 +145,14 @@ vdev_file_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset, error = vn_rdwr(rw, vf->vf_vnode, data, size, offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + if (error || resid != 0) return (EIO); - return (0); + + if (zio_injection_enabled) + error = zio_handle_device_injection(vd, EIO); + + return (error); } /* @@ -189,10 +195,10 @@ vdev_file_probe(vdev_t *vd) nvd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); if (vd->vdev_path) nvd->vdev_path = spa_strdup(vd->vdev_path); + nvd->vdev_guid = vd->vdev_guid; retries++; - error = vdev_file_open_common(nvd); - if (error) + if (vdev_file_open_common(nvd) != 0) break; } @@ -295,8 +301,13 @@ vdev_file_io_done(zio_t *zio) * If an error has been encountered then attempt to probe the device * to determine if it's still accessible. */ - if (zio->io_error == EIO && vdev_probe(vd) != 0) - vd->vdev_is_failing = B_TRUE; + if (zio->io_error == EIO && vdev_probe(vd) != 0) { + if (!vd->vdev_is_failing) { + vd->vdev_is_failing = B_TRUE; + zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, + vd->vdev_spa, vd, zio, 0, 0); + } + } vdev_queue_io_done(zio); diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index b163f4f5f0..097fe26723 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -913,6 +913,15 @@ vdev_label_sync_top_done(zio_t *zio) } /* + * We ignore errors for log devices, simply free the private data. + */ +static void +vdev_label_sync_log_done(zio_t *zio) +{ + kmem_free(zio->io_private, sizeof (uint64_t)); +} + +/* * Write all even or odd labels to all leaves of the specified vdev. */ static void @@ -984,7 +993,8 @@ vdev_label_sync_list(spa_t *spa, int l, int flags, uint64_t txg) for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) { uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); - zio_t *vio = zio_null(nio, spa, vdev_label_sync_top_done, + zio_t *vio = zio_null(nio, spa, vd->vdev_islog ? + vdev_label_sync_log_done : vdev_label_sync_top_done, good_writes, flags); vdev_label_sync(vio, vd, l, txg); zio_nowait(vio); diff --git a/usr/src/uts/common/fs/zfs/vdev_root.c b/usr/src/uts/common/fs/zfs/vdev_root.c index 77829c0aa3..a30e6a2876 100644 --- a/usr/src/uts/common/fs/zfs/vdev_root.c +++ b/usr/src/uts/common/fs/zfs/vdev_root.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,7 +48,7 @@ static int too_many_errors(vdev_t *vd, int numerrors) { ASSERT3U(numerrors, <=, vd->vdev_children); - return (numerrors == vd->vdev_children); + return (numerrors > 0); } static int @@ -67,22 +67,17 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) vdev_t *cvd = vd->vdev_child[c]; int error; - if ((error = vdev_open(cvd)) != 0) { + if ((error = vdev_open(cvd)) != 0 && + !cvd->vdev_islog) { lasterror = error; numerrors++; continue; } } - if (numerrors > 0) { - if (!too_many_errors(vd, numerrors)) { - /* XXX - should not be explicitly setting this state */ - vdev_set_state(vd, B_FALSE, VDEV_STATE_FAULTED, - VDEV_AUX_NO_REPLICAS); - } else { - vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; - return (lasterror); - } + if (too_many_errors(vd, numerrors)) { + vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; + return (lasterror); } *asize = 0; @@ -103,13 +98,9 @@ vdev_root_close(vdev_t *vd) static void vdev_root_state_change(vdev_t *vd, int faulted, int degraded) { - if (faulted) { - if (too_many_errors(vd, faulted)) - vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_NO_REPLICAS); - else - vdev_set_state(vd, B_FALSE, VDEV_STATE_FAULTED, - VDEV_AUX_NO_REPLICAS); + if (too_many_errors(vd, faulted)) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_NO_REPLICAS); } else if (degraded) { vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE); } else { diff --git a/usr/src/uts/common/fs/zfs/zfs_fm.c b/usr/src/uts/common/fs/zfs/zfs_fm.c index aa27d3d6c1..5d35d0db23 100644 --- a/usr/src/uts/common/fs/zfs/zfs_fm.c +++ b/usr/src/uts/common/fs/zfs/zfs_fm.c @@ -141,6 +141,16 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, */ if (zio->io_flags & ZIO_FLAG_SPECULATIVE) return; + + /* + * If the vdev has already been marked as failing due to a + * failed probe, then ignore any subsequent I/O errors, as the + * DE will automatically fault the vdev on the first such + * failure. + */ + if (vd != NULL && vd->vdev_is_failing && + strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) != 0) + return; } if ((ereport = fm_nvlist_create(NULL)) == NULL) diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index 0a44e52d3c..6358d6647e 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -3140,6 +3140,9 @@ top: zfs_log_rename(zilog, tx, TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); + + /* Update path information for the target vnode */ + vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); } } diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index 28eda8dea3..ed7436cb24 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -1834,8 +1834,12 @@ zio_should_retry(zio_t *zio) return (B_FALSE); if (zio->io_delegate_list != NULL) return (B_FALSE); - if (vd && vd != vd->vdev_top) - return (B_FALSE); + if (vd != NULL) { + if (vd != vd->vdev_top) + return (B_FALSE); + if (vd->vdev_is_failing) + return (B_FALSE); + } if (zio->io_flags & ZIO_FLAG_DONT_RETRY) return (B_FALSE); if (zio->io_retries > 0) diff --git a/usr/src/uts/common/sys/fm/fs/zfs.h b/usr/src/uts/common/sys/fm/fs/zfs.h index e82fe7af53..a927ed1f9d 100644 --- a/usr/src/uts/common/sys/fm/fs/zfs.h +++ b/usr/src/uts/common/sys/fm/fs/zfs.h @@ -46,6 +46,7 @@ extern "C" { #define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small" #define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label" #define FM_EREPORT_ZFS_IO_FAILURE "io_failure" +#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure" #define FM_EREPORT_PAYLOAD_ZFS_POOL "pool" #define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode" diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h index c5e458513c..75caf7e189 100644 --- a/usr/src/uts/common/sys/vnode.h +++ b/usr/src/uts/common/sys/vnode.h @@ -1225,6 +1225,7 @@ void vn_copypath(struct vnode *src, struct vnode *dst); void vn_setpath_str(struct vnode *vp, const char *str, size_t len); void vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp, const char *path, size_t plen); +void vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len); /* Vnode event notification */ void vnevent_rename_src(vnode_t *, vnode_t *, char *, caller_context_t *); |