diff options
author | Saso Kiselkov <skiselkov@gmail.com> | 2013-05-23 08:52:46 -0800 |
---|---|---|
committer | Christopher Siden <chris.siden@delphix.com> | 2013-05-23 09:52:46 -0700 |
commit | a49dc89305dba244dc67270b5afddcf7da3e36cf (patch) | |
tree | ce16430dce38ea87ef94c217a97820decb4f2efa | |
parent | 56b2bdd1f04d465cfe4a95b88ae5cba5884154e4 (diff) | |
download | illumos-joyent-a49dc89305dba244dc67270b5afddcf7da3e36cf.tar.gz |
3621 ZFS LU stuck in the offlining state
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed by: Jeff Biseda <jeff.biseda@delphix.com>
Reviewed by: Dan McDonald <danmcd@nexenta.com>
Approved by: Christopher Siden <christopher.siden@delphix.com>
-rw-r--r-- | usr/src/uts/common/io/comstar/stmf/stmf.c | 55 | ||||
-rw-r--r-- | usr/src/uts/common/io/comstar/stmf/stmf_impl.h | 2 |
2 files changed, 55 insertions, 2 deletions
diff --git a/usr/src/uts/common/io/comstar/stmf/stmf.c b/usr/src/uts/common/io/comstar/stmf/stmf.c index 1422e12daf..2981453cd4 100644 --- a/usr/src/uts/common/io/comstar/stmf/stmf.c +++ b/usr/src/uts/common/io/comstar/stmf/stmf.c @@ -23,6 +23,8 @@ */ /* * Copyright 2012, Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */ #include <sys/conf.h> @@ -65,6 +67,15 @@ static uint64_t stmf_proxy_msg_id = 1; #define MSG_ID_TM_BIT 0x8000000000000000 #define ALIGNED_TO_8BYTE_BOUNDARY(i) (((i) + 7) & ~7) +/* + * When stmf_io_deadman_enabled is set to B_TRUE, we check that finishing up + * I/O operations on an offlining LU doesn't take longer than stmf_io_deadman + * seconds. If it does, we trigger a panic to inform the user of hung I/O + * blocking us for too long. + */ +boolean_t stmf_io_deadman_enabled = B_TRUE; +int stmf_io_deadman = 1000; /* seconds */ + struct stmf_svc_clocks; static int stmf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); @@ -91,6 +102,7 @@ stmf_xfer_data_t *stmf_prepare_tpgs_data(uint8_t ilu_alua); void stmf_svc_init(); stmf_status_t stmf_svc_fini(); void stmf_svc(void *arg); +static void stmf_wait_ilu_tasks_finish(stmf_i_lu_t *ilu); void stmf_svc_queue(int cmd, void *obj, stmf_state_change_info_t *info); static void stmf_svc_kill_obj_requests(void *obj); static void stmf_svc_timeout(struct stmf_svc_clocks *); @@ -3059,6 +3071,7 @@ stmf_register_lu(stmf_lu_t *lu) } ilu->ilu_cur_task_cntr = &ilu->ilu_task_cntr1; STMF_EVENT_ALLOC_HANDLE(ilu->ilu_event_hdl); + cv_init(&ilu->ilu_offline_pending_cv, NULL, CV_DRIVER, NULL); stmf_create_kstat_lu(ilu); /* * register with proxy module if available and logical unit @@ -3197,6 +3210,7 @@ stmf_deregister_lu(stmf_lu_t *lu) mutex_destroy(&ilu->ilu_kstat_lock); } stmf_delete_itl_kstat_by_guid(ilu->ilu_ascii_hex_guid); + cv_destroy(&ilu->ilu_offline_pending_cv); mutex_exit(&stmf_state.stmf_lock); return (STMF_SUCCESS); } @@ -4447,6 +4461,8 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss, rw_exit(iss->iss_lockp); return (NULL); } + ASSERT(lu == dlun0 || (ilu->ilu_state != STMF_STATE_OFFLINING && + ilu->ilu_state != STMF_STATE_OFFLINE)); do { if (ilu->ilu_free_tasks == NULL) { new_task = 1; @@ -4575,6 +4591,8 @@ stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss) itask->itask_lu_free_next = ilu->ilu_free_tasks; ilu->ilu_free_tasks = itask; ilu->ilu_ntasks_free++; + if (ilu->ilu_ntasks == ilu->ilu_ntasks_free) + cv_signal(&ilu->ilu_offline_pending_cv); mutex_exit(&ilu->ilu_task_lock); atomic_add_32(itask->itask_ilu_task_cntr, -1); } @@ -7838,8 +7856,7 @@ stmf_svc(void *arg) STMF_ABORTED); lu = (stmf_lu_t *)req->svc_obj; ilu = (stmf_i_lu_t *)lu->lu_stmf_private; - if (ilu->ilu_ntasks != ilu->ilu_ntasks_free) - break; + stmf_wait_ilu_tasks_finish(ilu); lu->lu_ctl(lu, req->svc_cmd, &req->svc_info); break; default: @@ -7991,6 +8008,40 @@ stmf_svc_timeout(struct stmf_svc_clocks *clks) stmf_state.stmf_svc_flags |= STMF_SVC_ACTIVE; } +/* + * Waits for ongoing I/O tasks to finish on an LU in preparation for + * the LU's offlining. The LU should already be in an Offlining state + * (otherwise I/O to the LU might never end). There is an additional + * enforcement of this via a deadman timer check. + */ +static void +stmf_wait_ilu_tasks_finish(stmf_i_lu_t *ilu) +{ + clock_t start, now, deadline; + + start = now = ddi_get_lbolt(); + deadline = start + drv_usectohz(stmf_io_deadman * 1000000llu); + mutex_enter(&ilu->ilu_task_lock); + while (ilu->ilu_ntasks != ilu->ilu_ntasks_free) { + (void) cv_timedwait(&ilu->ilu_offline_pending_cv, + &ilu->ilu_task_lock, deadline); + now = ddi_get_lbolt(); + if (now > deadline) { + if (stmf_io_deadman_enabled) { + cmn_err(CE_PANIC, "stmf_svc: I/O deadman hit " + "on STMF_CMD_LU_OFFLINE after %d seconds", + stmf_io_deadman); + } else { + /* keep on spinning */ + deadline = now + drv_usectohz(stmf_io_deadman * + 1000000llu); + } + } + } + mutex_exit(&ilu->ilu_task_lock); + DTRACE_PROBE1(deadman__timeout__wait, clock_t, now - start); +} + void stmf_svc_queue(int cmd, void *obj, stmf_state_change_info_t *info) { diff --git a/usr/src/uts/common/io/comstar/stmf/stmf_impl.h b/usr/src/uts/common/io/comstar/stmf/stmf_impl.h index 306193d7fe..e5046ec64d 100644 --- a/usr/src/uts/common/io/comstar/stmf/stmf_impl.h +++ b/usr/src/uts/common/io/comstar/stmf/stmf_impl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #ifndef _STMF_IMPL_H #define _STMF_IMPL_H @@ -97,6 +98,7 @@ typedef struct stmf_i_lu { kstat_t *ilu_kstat_info; kstat_t *ilu_kstat_io; kmutex_t ilu_kstat_lock; + kcondvar_t ilu_offline_pending_cv; /* point to the luid entry in stmf_state.stmf_luid_list */ void *ilu_luid; |