summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Chartre <Alexandre.Chartre@oracle.com>2010-04-20 17:11:45 -0700
committerAlexandre Chartre <Alexandre.Chartre@oracle.com>2010-04-20 17:11:45 -0700
commitca6d128098b8e877673c13a3db1a0d3417ce5879 (patch)
tree1358db2f8099e7f85f30d6f5411acdf8b2c96c03
parent52056d7b0df4718b60b8de756bee184ba1831169 (diff)
downloadillumos-gate-ca6d128098b8e877673c13a3db1a0d3417ce5879.tar.gz
6931950 vdisk does not timeout when backend does not exist
6924052 iostat within guest domains report 100% busy following a control domain reboot
-rw-r--r--usr/src/uts/sun4v/io/vdc.c338
-rw-r--r--usr/src/uts/sun4v/sys/vdc.h24
-rw-r--r--usr/src/uts/sun4v/sys/vdsk_common.h9
3 files changed, 286 insertions, 85 deletions
diff --git a/usr/src/uts/sun4v/io/vdc.c b/usr/src/uts/sun4v/io/vdc.c
index 3479e2469a..77619e38fe 100644
--- a/usr/src/uts/sun4v/io/vdc.c
+++ b/usr/src/uts/sun4v/io/vdc.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -238,11 +237,52 @@ static void vdc_eio_thread(void *arg);
*/
/*
+ * Number of handshake retries with the current server before switching to
+ * a different server. These retries are done so that we stick with the same
+ * server if vdc receives a LDC reset event during the initiation of the
+ * handshake. This can happen if vdc reset the LDC channel and then immediately
+ * retry a connexion before it has received the LDC reset event.
+ *
+ * If there is only one server then we "switch" to the same server. We also
+ * switch if the handshake has reached the attribute negotiate step whatever
+ * the number of handshake retries might be.
+ */
+static uint_t vdc_hshake_retries = VDC_HSHAKE_RETRIES;
+
+/*
+ * If the handshake done during the attach fails then the two following
+ * variables will also be used to control the number of retries for the
+ * next handshakes. In that case, when a handshake is done after the
+ * attach (i.e. the vdc lifecycle is VDC_ONLINE_PENDING) then the handshake
+ * will be retried until we have done an attribution negotiation with each
+ * server, with a specified minimum total number of negotations (the value
+ * of the vdc_hattr_min_initial or vdc_hattr_min variable).
+ *
+ * This prevents new I/Os on a newly used vdisk to block forever if the
+ * attribute negotiations can not be done, and to limit the amount of time
+ * before I/Os will fail. Basically, attribute negotiations will fail when
+ * the service is up but the backend does not exist. In that case, vds will
+ * typically retry to access the backend during 50 seconds. So I/Os will fail
+ * after the following amount of time:
+ *
+ * 50 seconds x max(number of servers, vdc->hattr_min)
+ *
+ * After that the handshake done during the attach has failed then the next
+ * handshake will use vdc_attr_min_initial. This handshake will correspond to
+ * the very first I/O to the device. If this handshake also fails then
+ * vdc_hattr_min will be used for subsequent handshakes. We typically allow
+ * more retries for the first handshake (VDC_HATTR_MIN_INITIAL = 3) to give more
+ * time for the backend to become available (50s x VDC_HATTR_MIN_INITIAL = 150s)
+ * in case this is a critical vdisk (e.g. vdisk access during boot). Then we use
+ * a smaller value (VDC_HATTR_MIN = 1) to avoid waiting too long for each I/O.
+ */
+static uint_t vdc_hattr_min_initial = VDC_HATTR_MIN_INITIAL;
+static uint_t vdc_hattr_min = VDC_HATTR_MIN;
+
+/*
* Tunable variables to control how long vdc waits before timing out on
* various operations
*/
-static int vdc_hshake_retries = 3;
-
static int vdc_timeout = 0; /* units: seconds */
static int vdc_ldcup_timeout = 1; /* units: seconds */
@@ -457,7 +497,9 @@ vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
mutex_exit(&vdc->ownership_lock);
/* mark instance as detaching */
+ mutex_enter(&vdc->lock);
vdc->lifecycle = VDC_LC_DETACHING;
+ mutex_exit(&vdc->lock);
/*
* Try and disable callbacks to prevent another handshake. We have to
@@ -744,9 +786,9 @@ vdc_do_attach(dev_info_t *dip)
* available when creating the kstat
*/
vdc_set_err_kstats(vdc);
-
ddi_report_dev(dip);
- vdc->lifecycle = VDC_LC_ONLINE;
+ ASSERT(vdc->lifecycle == VDC_LC_ONLINE ||
+ vdc->lifecycle == VDC_LC_ONLINE_PENDING);
DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance);
return_status:
@@ -2433,13 +2475,6 @@ vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep)
vdc->num_servers++;
}
- /*
- * Adjust the max number of handshake retries to match
- * the number of vdisk servers.
- */
- if (vdc_hshake_retries < vdc->num_servers)
- vdc_hshake_retries = vdc->num_servers;
-
/* pick first server as current server */
if (vdc->server_list != NULL) {
vdc->curr_server = vdc->server_list;
@@ -2906,7 +2941,7 @@ vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr,
/*
* If this is a block read/write operation we update the I/O statistics
* to indicate that the request is being put on the waitq to be
- * serviced.
+ * serviced. Operations which are resubmitted are already in the waitq.
*
* We do it here (a common routine for both synchronous and strategy
* calls) for performance reasons - we are already holding vdc->lock
@@ -2914,7 +2949,8 @@ vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr,
* grab the 'lock' mutex to update the stats if we were to do this
* higher up the stack in vdc_strategy() et. al.
*/
- if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) {
+ if (((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) &&
+ !(flags & VDC_OP_RESUBMIT)) {
DTRACE_IO1(start, buf_t *, bufp);
VD_KSTAT_WAITQ_ENTER(vdcp);
}
@@ -2968,7 +3004,7 @@ vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr,
}
} while (vdc_populate_descriptor(vdcp, operation, addr,
- nbytes, slice, offset, bufp, dir, flags));
+ nbytes, slice, offset, bufp, dir, flags & ~VDC_OP_RESUBMIT));
done:
/*
@@ -2976,7 +3012,9 @@ done:
* to indicate that this request has been placed on the queue for
* processing (i.e sent to the vDisk server) - iostat(1M) will
* report the time waiting for the vDisk server under the %b column
- * In the case of an error we simply take it off the wait queue.
+ *
+ * In the case of an error we take it off the wait queue only if
+ * the I/O was not resubmited.
*/
if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) {
if (rv == 0) {
@@ -2984,8 +3022,10 @@ done:
DTRACE_PROBE1(send, buf_t *, bufp);
} else {
VD_UPDATE_ERR_STATS(vdcp, vd_transerrs);
- VD_KSTAT_WAITQ_EXIT(vdcp);
- DTRACE_IO1(done, buf_t *, bufp);
+ if (!(flags & VDC_OP_RESUBMIT)) {
+ VD_KSTAT_WAITQ_EXIT(vdcp);
+ DTRACE_IO1(done, buf_t *, bufp);
+ }
}
}
@@ -3235,24 +3275,31 @@ vdc_do_op(vdc_t *vdc, int op, caddr_t addr, size_t nbytes, int slice,
} else {
/* wait for the response message */
rv = vdc_wait_for_response(vdc, &vio_msg);
+
+ if (rv == 0)
+ rv = vdc_process_data_msg(vdc, &vio_msg);
+
if (rv) {
/*
* If this is a block read/write we update the I/O
* statistics kstat to take it off the run queue.
+ * If it is a resubmit then it needs to stay in
+ * in the waitq, and it will be removed when the
+ * I/O is eventually completed or cancelled.
*/
mutex_enter(&vdc->lock);
if (op == VD_OP_BREAD || op == VD_OP_BWRITE) {
- VD_UPDATE_ERR_STATS(vdc, vd_transerrs);
- VD_KSTAT_RUNQ_EXIT(vdc);
- DTRACE_IO1(done, buf_t *, bufp);
+ if (flags & VDC_OP_RESUBMIT) {
+ VD_KSTAT_RUNQ_BACK_TO_WAITQ(vdc);
+ } else {
+ VD_KSTAT_RUNQ_EXIT(vdc);
+ DTRACE_IO1(done, buf_t *, bufp);
+ }
}
mutex_exit(&vdc->lock);
goto done;
}
- rv = vdc_process_data_msg(vdc, &vio_msg);
- if (rv)
- goto done;
}
if (bufp == &buf)
@@ -3927,7 +3974,8 @@ vdc_resubmit_backup_dring(vdc_t *vdcp)
curr_ldep->addr, curr_ldep->nbytes,
curr_ldep->slice, curr_ldep->offset,
curr_ldep->buf, curr_ldep->dir,
- curr_ldep->flags & ~VDC_OP_STATE_RUNNING);
+ (curr_ldep->flags & ~VDC_OP_STATE_RUNNING) |
+ VDC_OP_RESUBMIT);
if (rv) {
DMSG(vdcp, 1, "[%d] resubmit entry %d failed\n",
@@ -4027,7 +4075,7 @@ vdc_cancel_backup_dring(vdc_t *vdcp)
if (ldep->operation == VD_OP_BREAD ||
ldep->operation == VD_OP_BWRITE) {
VD_UPDATE_ERR_STATS(vdcp, vd_softerrs);
- VD_KSTAT_RUNQ_EXIT(vdcp);
+ VD_KSTAT_WAITQ_EXIT(vdcp);
DTRACE_IO1(done, buf_t *, bufp);
}
bioerror(bufp, EIO);
@@ -4057,12 +4105,8 @@ vdc_cancel_backup_dring(vdc_t *vdcp)
* Description:
* This function is invoked if the timeout set to establish the connection
* with vds expires. This will happen if we spend too much time in the
- * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states.
- *
- * If the timeout does not expire, it will be cancelled when we reach the
- * VDC_STATE_HANDLE_PENDING, VDC_STATE_FAILED or VDC_STATE_DETACH state.
- * This function can also be invoked while we are in those states, in
- * which case we do nothing because the timeout is being cancelled.
+ * VDC_STATE_INIT_WAITING, VDC_STATE_NEGOTIATE or VDC_STATE_HANDLE_PENDING
+ * states.
*
* Arguments:
* arg - argument of the timeout function actually a soft state
@@ -4078,17 +4122,7 @@ vdc_connection_timeout(void *arg)
mutex_enter(&vdcp->lock);
- if (vdcp->state == VDC_STATE_HANDLE_PENDING ||
- vdcp->state == VDC_STATE_DETACH ||
- vdcp->state == VDC_STATE_FAILED) {
- /*
- * The connection has just been re-established, has failed or
- * we are detaching.
- */
- vdcp->ctimeout_reached = B_FALSE;
- } else {
- vdcp->ctimeout_reached = B_TRUE;
- }
+ vdcp->ctimeout_reached = B_TRUE;
mutex_exit(&vdcp->lock);
}
@@ -4111,7 +4145,8 @@ vdc_connection_timeout(void *arg)
static void
vdc_backup_local_dring(vdc_t *vdcp)
{
- int dring_size;
+ int b_idx, count, dring_size;
+ vdc_local_desc_t *curr_ldep;
ASSERT(MUTEX_HELD(&vdcp->lock));
ASSERT(vdcp->state == VDC_STATE_RESETTING);
@@ -4150,6 +4185,28 @@ vdc_backup_local_dring(vdc_t *vdcp)
vdcp->local_dring_backup_tail = vdcp->dring_curr_idx;
vdcp->local_dring_backup_len = vdcp->dring_len;
+
+ /*
+ * At this point, pending read or write I/Os are recorded in the
+ * runq. We update the I/O statistics to indicate that they are now
+ * back in the waitq.
+ */
+ b_idx = vdcp->local_dring_backup_tail;
+ for (count = 0; count < vdcp->local_dring_backup_len; count++) {
+
+ curr_ldep = &(vdcp->local_dring_backup[b_idx]);
+
+ if (!curr_ldep->is_free &&
+ (curr_ldep->operation == VD_OP_BREAD ||
+ curr_ldep->operation == VD_OP_BWRITE)) {
+ VD_KSTAT_RUNQ_BACK_TO_WAITQ(vdcp);
+ }
+
+ /* get the next element */
+ if (++b_idx >= vdcp->local_dring_backup_len)
+ b_idx = 0;
+ }
+
}
static void
@@ -4238,6 +4295,108 @@ vdc_print_svc_status(vdc_t *vdcp)
vdcp->curr_server->log_state = svc_state;
}
+/*
+ * Function:
+ * vdc_handshake_retry
+ *
+ * Description:
+ * This function indicates if the handshake should be retried or not.
+ * This depends on the lifecycle of the driver:
+ *
+ * VDC_LC_ATTACHING: the handshake is retried until we have tried
+ * a handshake with each server. We don't care how far each handshake
+ * went, the goal is just to try the handshake. We want to minimize the
+ * the time spent doing the attach because this is locking the device
+ * tree.
+ *
+ * VDC_LC_ONLINE_PENDING: the handshake is retried while we haven't done
+ * consecutive attribute negotiations with each server, and we haven't
+ * reached a minimum total of consecutive negotiations (hattr_min). The
+ * number of attribution negotiations determines the time spent before
+ * failing pending I/Os if the handshake is not successful.
+ *
+ * VDC_LC_ONLINE: the handshake is always retried, until we have a
+ * successful handshake with a server.
+ *
+ * VDC_LC_DETACHING: N/A
+ *
+ * Arguments:
+ * hshake_cnt - number of handshake attempts
+ * hattr_cnt - number of attribute negotiation attempts
+ *
+ * Return Code:
+ * B_TRUE - handshake should be retried
+ * B_FALSE - handshake should not be retried
+ */
+static boolean_t
+vdc_handshake_retry(vdc_t *vdcp, int hshake_cnt, int hattr_cnt)
+{
+ int hattr_total = 0;
+ vdc_server_t *srvr;
+
+ ASSERT(vdcp->lifecycle != VDC_LC_DETACHING);
+
+ /* update handshake counters */
+ vdcp->curr_server->hshake_cnt = hshake_cnt;
+ vdcp->curr_server->hattr_cnt = hattr_cnt;
+
+ /*
+ * If no attribute negotiation was done then we reset the total
+ * number otherwise we cumulate the number.
+ */
+ if (hattr_cnt == 0)
+ vdcp->curr_server->hattr_total = 0;
+ else
+ vdcp->curr_server->hattr_total += hattr_cnt;
+
+ /*
+ * If we are online (i.e. at least one handshake was successfully
+ * completed) then we always retry the handshake.
+ */
+ if (vdcp->lifecycle == VDC_LC_ONLINE)
+ return (B_TRUE);
+
+ /*
+ * If we are attaching then we retry the handshake only if we haven't
+ * tried with all servers.
+ */
+ if (vdcp->lifecycle == VDC_LC_ATTACHING) {
+
+ for (srvr = vdcp->server_list; srvr != NULL;
+ srvr = srvr->next) {
+ if (srvr->hshake_cnt == 0) {
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+ }
+
+ /*
+ * Here we are in the case where we haven't completed any handshake
+ * successfully yet.
+ */
+ ASSERT(vdcp->lifecycle == VDC_LC_ONLINE_PENDING);
+
+ /*
+ * We retry the handshake if we haven't done an attribute negotiation
+ * with each server. This is to handle the case where one service domain
+ * is down.
+ */
+ for (srvr = vdcp->server_list; srvr != NULL; srvr = srvr->next) {
+ if (srvr->hattr_cnt == 0) {
+ return (B_TRUE);
+ }
+ hattr_total += srvr->hattr_total;
+ }
+
+ /*
+ * We retry the handshake if we haven't reached the minimum number of
+ * attribute negotiation.
+ */
+ return (hattr_total < vdcp->hattr_min);
+}
+
/* -------------------------------------------------------------------------- */
/*
@@ -4264,15 +4423,20 @@ vdc_print_svc_status(vdc_t *vdcp)
static void
vdc_process_msg_thread(vdc_t *vdcp)
{
+ boolean_t failure_msg = B_FALSE;
int status;
int ctimeout;
timeout_id_t tmid = 0;
clock_t ldcup_timeout = 0;
vdc_server_t *srvr;
vdc_service_state_t svc_state;
+ int hshake_cnt = 0;
+ int hattr_cnt = 0;
mutex_enter(&vdcp->lock);
+ ASSERT(vdcp->lifecycle == VDC_LC_ATTACHING);
+
for (;;) {
#define Q(_s) (vdcp->state == _s) ? #_s :
@@ -4287,6 +4451,7 @@ vdc_process_msg_thread(vdc_t *vdcp)
Q(VDC_STATE_RESETTING)
Q(VDC_STATE_DETACH)
"UNKNOWN");
+#undef Q
switch (vdcp->state) {
case VDC_STATE_INIT:
@@ -4331,20 +4496,28 @@ vdc_process_msg_thread(vdc_t *vdcp)
break;
}
- /* Check if we are re-initializing repeatedly */
- if (vdcp->hshake_cnt > vdc_hshake_retries &&
- vdcp->lifecycle != VDC_LC_ONLINE) {
+ /*
+ * Switch to another server when we reach the limit of
+ * the number of handshake per server or if we have done
+ * an attribute negotiation.
+ */
+ if (hshake_cnt >= vdc_hshake_retries || hattr_cnt > 0) {
- DMSG(vdcp, 0, "[%d] too many handshakes,cnt=%d",
- vdcp->instance, vdcp->hshake_cnt);
- vdcp->state = VDC_STATE_FAILED;
- break;
- }
+ if (!vdc_handshake_retry(vdcp, hshake_cnt,
+ hattr_cnt)) {
+ DMSG(vdcp, 0, "[%d] too many "
+ "handshakes", vdcp->instance);
+ vdcp->state = VDC_STATE_FAILED;
+ break;
+ }
- /* Switch server */
- if (vdcp->hshake_cnt > 0)
vdc_switch_server(vdcp);
- vdcp->hshake_cnt++;
+
+ hshake_cnt = 0;
+ hattr_cnt = 0;
+ }
+
+ hshake_cnt++;
/* Bring up connection with vds via LDC */
status = vdc_start_ldc_connection(vdcp);
@@ -4401,6 +4574,8 @@ vdc_process_msg_thread(vdc_t *vdcp)
goto reset;
}
+ hattr_cnt++;
+
switch (status = vdc_attr_negotiation(vdcp)) {
case 0:
break;
@@ -4523,8 +4698,17 @@ done:
* reported and we wait for a new I/O before attempting
* another connection.
*/
+
cmn_err(CE_NOTE, "vdisk@%d disk access failed",
vdcp->instance);
+ failure_msg = B_TRUE;
+
+ if (vdcp->lifecycle == VDC_LC_ATTACHING) {
+ vdcp->lifecycle = VDC_LC_ONLINE_PENDING;
+ vdcp->hattr_min = vdc_hattr_min_initial;
+ } else {
+ vdcp->hattr_min = vdc_hattr_min;
+ }
/* cancel any timeout */
if (tmid != 0) {
@@ -4549,10 +4733,14 @@ done:
for (srvr = vdcp->server_list; srvr != NULL;
srvr = srvr->next) {
srvr->svc_state = VDC_SERVICE_OFFLINE;
+ srvr->hshake_cnt = 0;
+ srvr->hattr_cnt = 0;
+ srvr->hattr_total = 0;
}
/* reset variables */
- vdcp->hshake_cnt = 0;
+ hshake_cnt = 0;
+ hattr_cnt = 0;
vdcp->ctimeout_reached = B_FALSE;
vdcp->state = VDC_STATE_RESETTING;
@@ -4561,11 +4749,24 @@ done:
/* enter running state */
case VDC_STATE_RUNNING:
+
+ if (vdcp->lifecycle == VDC_LC_DETACHING) {
+ vdcp->state = VDC_STATE_DETACH;
+ break;
+ }
+
+ vdcp->lifecycle = VDC_LC_ONLINE;
+
+ if (failure_msg) {
+ cmn_err(CE_NOTE, "vdisk@%d disk access "
+ "recovered", vdcp->instance);
+ failure_msg = B_FALSE;
+ }
+
/*
* Signal anyone waiting for the connection
* to come on line.
*/
- vdcp->hshake_cnt = 0;
cv_broadcast(&vdcp->running_cv);
/* backend has to be checked after reset */
@@ -4607,8 +4808,14 @@ done:
srvr = srvr->next) {
srvr->svc_state = VDC_SERVICE_OFFLINE;
srvr->log_state = VDC_SERVICE_NONE;
+ srvr->hshake_cnt = 0;
+ srvr->hattr_cnt = 0;
+ srvr->hattr_total = 0;
}
+ hshake_cnt = 0;
+ hattr_cnt = 0;
+
vdc_print_svc_status(vdcp);
vdcp->state = VDC_STATE_RESETTING;
@@ -4795,20 +5002,9 @@ vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg)
if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) {
/*
* Update the I/O statistics to indicate that an error ocurred.
- *
- * We need to update the run queue if a read or write request
- * is being NACKed - otherwise there will appear to be an
- * indefinite outstanding request and statistics reported by
- * iostat(1M) will be incorrect. The transaction will be
- * resubmitted from the backup DRing following the reset
- * and the wait/run queues will be entered again.
+ * No need to update the wait/run queues, this will be done by
+ * the thread calling this function.
*/
- ldep = &vdcp->local_dring[idx];
- op = ldep->operation;
- if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) {
- DTRACE_IO1(done, buf_t *, ldep->buf);
- VD_KSTAT_RUNQ_EXIT(vdcp);
- }
VD_UPDATE_ERR_STATS(vdcp, vd_softerrs);
VDC_DUMP_DRING_MSG(dring_msg);
DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance);
diff --git a/usr/src/uts/sun4v/sys/vdc.h b/usr/src/uts/sun4v/sys/vdc.h
index 54eb5080d7..bd10ee336c 100644
--- a/usr/src/uts/sun4v/sys/vdc.h
+++ b/usr/src/uts/sun4v/sys/vdc.h
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _VDC_H
@@ -100,6 +99,7 @@ extern "C" {
#define VDC_OP_ERRCHK_BACKEND 0x02 /* check backend on error */
#define VDC_OP_ERRCHK_CONFLICT 0x04 /* check resv conflict on error */
#define VDC_OP_DRING_RESERVED 0x08 /* use dring reserved entry */
+#define VDC_OP_RESUBMIT 0x10 /* I/O is being resubmitted */
#define VDC_OP_ERRCHK (VDC_OP_ERRCHK_BACKEND | VDC_OP_ERRCHK_CONFLICT)
#define VDC_OP_NORMAL (VDC_OP_STATE_RUNNING | VDC_OP_ERRCHK)
@@ -123,16 +123,12 @@ extern "C" {
VDSK_EFI_DEV_SET(dev, vdsk, ioctl, \
(vdsk)->vdisk_bsize, (vdsk)->vdisk_size)
-/*
- * variables controlling how long to wait before timing out and how many
- * retries to attempt before giving up when communicating with vds.
- *
- * These values need to be sufficiently large so that a guest can survive
- * the reboot of the service domain.
- */
-#define VDC_RETRIES 10
+/* max number of handshake retries per server */
+#define VDC_HSHAKE_RETRIES 3
-#define VDC_USEC_TIMEOUT_MIN (30 * MICROSEC) /* 30 sec */
+/* minimum number of attribute negotiations before handshake failure */
+#define VDC_HATTR_MIN_INITIAL 3
+#define VDC_HATTR_MIN 1
/*
* This macro returns the number of Hz that the vdc driver should wait before
@@ -216,6 +212,7 @@ typedef enum vdc_service_state {
*/
typedef enum vdc_lc_state {
VDC_LC_ATTACHING, /* driver is attaching */
+ VDC_LC_ONLINE_PENDING, /* driver is attached, handshake pending */
VDC_LC_ONLINE, /* driver is attached and online */
VDC_LC_DETACHING /* driver is detaching */
} vdc_lc_state_t;
@@ -282,6 +279,9 @@ typedef struct vdc_server {
ldc_handle_t ldc_handle; /* Server LDC handle */
ldc_status_t ldc_state; /* Server LDC state */
uint64_t ctimeout; /* conn tmout (secs) */
+ uint_t hshake_cnt; /* handshakes count */
+ uint_t hattr_cnt; /* attr. neg. count */
+ uint_t hattr_total; /* attr. neg. total */
} vdc_server_t;
/*
@@ -300,8 +300,8 @@ typedef struct vdc {
int initialized; /* keeps track of what's init'ed */
vdc_lc_state_t lifecycle; /* Current state of the vdc instance */
+ uint_t hattr_min; /* min. # attribute negotiations */
- int hshake_cnt; /* number of failed handshakes */
uint8_t open[OTYPCNT]; /* mask of opened slices */
uint8_t open_excl; /* mask of exclusively opened slices */
ulong_t open_lyr[V_NUMPAR]; /* number of layered opens */
diff --git a/usr/src/uts/sun4v/sys/vdsk_common.h b/usr/src/uts/sun4v/sys/vdsk_common.h
index 0464964847..ca1ae648f4 100644
--- a/usr/src/uts/sun4v/sys/vdsk_common.h
+++ b/usr/src/uts/sun4v/sys/vdsk_common.h
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _VDSK_COMMON_H
@@ -581,6 +580,12 @@ void vd_efi_free(vd_efi_dev_t *dev, efi_gpt_t *gpt, efi_gpe_t *gpe);
kstat_waitq_to_runq(KSTAT_IO_PTR((vd)->io_stats)); \
}
+#define VD_KSTAT_RUNQ_BACK_TO_WAITQ(vd) \
+ if ((vd)->io_stats != NULL) { \
+ ASSERT(MUTEX_HELD(&(vd)->lock)); \
+ kstat_runq_back_to_waitq(KSTAT_IO_PTR((vd)->io_stats)); \
+ }
+
#define VD_KSTAT_RUNQ_ENTER(vd) \
if ((vd)->io_stats != NULL) { \
ASSERT(MUTEX_HELD(&(vd)->lock)); \