summaryrefslogtreecommitdiff
path: root/usr/src/uts/sun4v/sys/vdc.h
blob: 60d115a18a0f1f0eefa0790427819886366fd982 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef	_VDC_H
#define	_VDC_H

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * Virtual disk client implementation definitions
 */

#include <sys/sysmacros.h>
#include <sys/note.h>

#include <sys/ldc.h>
#include <sys/vio_mailbox.h>
#include <sys/vdsk_mailbox.h>
#include <sys/vdsk_common.h>

#ifdef	__cplusplus
extern "C" {
#endif

#define	VDC_DRIVER_NAME		"vdc"

/*
 * Bit-field values to indicate if parts of the vdc driver are initialised.
 */
#define	VDC_SOFT_STATE	0x0001
#define	VDC_LOCKS	0x0002
#define	VDC_MINOR	0x0004
#define	VDC_THREAD	0x0008
#define	VDC_LDC		0x0010
#define	VDC_LDC_INIT	0x0020
#define	VDC_LDC_CB	0x0040
#define	VDC_LDC_OPEN	0x0080
#define	VDC_DRING_INIT	0x0100	/* The DRing was created */
#define	VDC_DRING_BOUND	0x0200	/* The DRing was bound to an LDC channel */
#define	VDC_DRING_LOCAL	0x0400	/* The local private DRing was allocated */
#define	VDC_DRING_ENTRY	0x0800	/* At least one DRing entry was initialised */
#define	VDC_DRING	(VDC_DRING_INIT | VDC_DRING_BOUND |	\
				VDC_DRING_LOCAL | VDC_DRING_ENTRY)
#define	VDC_HANDSHAKE	0x1000	/* Indicates if a handshake is in progress */
#define	VDC_HANDSHAKE_STOP	0x2000	/* stop further handshakes */

/*
 * Definitions of strings to be used to create device node properties.
 * (vdc uses the capitalised versions of these properties as they are 64-bit)
 */
#define	VDC_NBLOCKS_PROP_NAME		"Nblocks"
#define	VDC_SIZE_PROP_NAME		"Size"

/*
 * Definitions of MD nodes/properties.
 */
#define	VDC_MD_CHAN_NAME		"channel-endpoint"
#define	VDC_MD_VDEV_NAME		"virtual-device"
#define	VDC_MD_DISK_NAME		"disk"
#define	VDC_MD_CFG_HDL			"cfg-handle"
#define	VDC_ID_PROP			"id"

/*
 * Definition of actions to be carried out when processing the sequence ID
 * of a message received from the vDisk server. The function verifying the
 * sequence number checks the 'seq_num_xxx' fields in the soft state and
 * returns whether the message should be processed (VDC_SEQ_NUM_TODO) or
 * whether it was it was previously processed (VDC_SEQ_NUM_SKIP).
 */
#define	VDC_SEQ_NUM_INVALID		-1	/* Error */
#define	VDC_SEQ_NUM_SKIP		0	/* Request already processed */
#define	VDC_SEQ_NUM_TODO		1	/* Request needs processing */

/*
 * Macros to get UNIT and PART number
 */
#define	VDCUNIT_SHIFT	3
#define	VDCPART_MASK	7

#define	VDCUNIT(dev)	(getminor((dev)) >> VDCUNIT_SHIFT)
#define	VDCPART(dev)	(getminor((dev)) &  VDCPART_MASK)

/*
 * Scheme to store the instance number and the slice number in the minor number.
 * (NOTE: Uses the same format and definitions as the sd(7D) driver)
 */
#define	VD_MAKE_DEV(instance, minor)	((instance << VDCUNIT_SHIFT) | minor)

/*
 * variables controlling how long to wait before timing out and how many
 * retries to attempt before giving up when communicating with vds.
 *
 * These values need to be sufficiently large so that a guest can survive
 * the reboot of the service domain.
 */
#define	VDC_RETRIES	10

#define	VDC_USEC_TIMEOUT_MIN	(30 * MICROSEC)		/* 30 sec */

/*
 * This macro returns the number of Hz that the vdc driver should wait before
 * a timeout is triggered. The 'timeout' parameter specifiecs the wait
 * time in Hz. The 'mul' parameter allows for a multiplier to be
 * specified allowing for a backoff to be implemented (e.g. using the
 * retry number as a multiplier) where the wait time will get longer if
 * there is no response on the previous retry.
 */
#define	VD_GET_TIMEOUT_HZ(timeout, mul)	\
	(ddi_get_lbolt() + ((timeout) * MAX(1, (mul))))

/*
 * Macros to manipulate Descriptor Ring variables in the soft state
 * structure.
 */
#define	VDC_GET_NEXT_REQ_ID(vdc)	((vdc)->req_id++)

#define	VDC_GET_DRING_ENTRY_PTR(vdc, idx)	\
		(vd_dring_entry_t *)((vdc)->dring_mem_info.vaddr +	\
			(idx * (vdc)->dring_entry_size))

#define	VDC_MARK_DRING_ENTRY_FREE(vdc, idx)			\
	{ \
		vd_dring_entry_t *dep = NULL;				\
		ASSERT(vdc != NULL);					\
		ASSERT((idx >= 0) && (idx < vdc->dring_len));		\
		ASSERT(vdc->dring_mem_info.vaddr != NULL);		\
		dep = (vd_dring_entry_t *)(vdc->dring_mem_info.vaddr +	\
			(idx * vdc->dring_entry_size));			\
		ASSERT(dep != NULL);					\
		dep->hdr.dstate = VIO_DESC_FREE;			\
	}

/* Initialise the Session ID and Sequence Num in the DRing msg */
#define	VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc)		\
		ASSERT(vdc != NULL);			\
		dmsg.tag.vio_sid = vdc->session_id;	\
		dmsg.seq_num = vdc->seq_num;

/*
 * The states that the read thread can be in.
 */
typedef enum vdc_rd_state {
	VDC_READ_IDLE,			/* idling - conn is not up */
	VDC_READ_WAITING,		/* waiting for data */
	VDC_READ_PENDING,		/* pending data avail for read */
	VDC_READ_RESET			/* channel was reset - stop reads */
} vdc_rd_state_t;

/*
 * The states that the vdc-vds connection can be in.
 */
typedef enum vdc_state {
	VDC_STATE_INIT,			/* device is initialized */
	VDC_STATE_INIT_WAITING,		/* waiting for ldc connection */
	VDC_STATE_NEGOTIATE,		/* doing handshake negotiation */
	VDC_STATE_HANDLE_PENDING,	/* handle requests in backup dring */
	VDC_STATE_RUNNING,		/* running and accepting requests */
	VDC_STATE_DETACH,		/* detaching */
	VDC_STATE_RESETTING		/* resetting connection with vds */
} vdc_state_t;

/*
 * The states that the vdc instance can be in.
 */
typedef enum vdc_lc_state {
	VDC_LC_ATTACHING,	/* driver is attaching */
	VDC_LC_ONLINE,		/* driver is attached and online */
	VDC_LC_DETACHING	/* driver is detaching */
} vdc_lc_state_t;

/*
 * Local Descriptor Ring entry
 *
 * vdc creates a Local (private) descriptor ring the same size as the
 * public descriptor ring it exports to vds.
 */

typedef enum {
	VIO_read_dir,		/* read data from server */
	VIO_write_dir,		/* write data to server */
	VIO_both_dir		/* transfer both in and out in same buffer */
} vio_desc_direction_t;

typedef enum {
	CB_STRATEGY,		/* non-blocking strategy call */
	CB_SYNC			/* synchronous operation */
} vio_cb_type_t;

typedef struct vdc_local_desc {
	boolean_t		is_free;	/* local state - inuse or not */

	int			operation;	/* VD_OP_xxx to be performed */
	caddr_t			addr;		/* addr passed in by consumer */
	int			slice;
	diskaddr_t		offset;		/* disk offset */
	size_t			nbytes;
	vio_cb_type_t		cb_type;	/* operation type blk/nonblk */
	void			*cb_arg;	/* buf passed to strategy() */
	vio_desc_direction_t	dir;		/* direction of transfer */

	caddr_t			align_addr;	/* used if addr non-aligned */
	ldc_mem_handle_t	desc_mhdl;	/* Mem handle of buf */
	vd_dring_entry_t	*dep;		/* public Dring Entry Pointer */

} vdc_local_desc_t;

/*
 * vdc soft state structure
 */
typedef struct vdc {

	kmutex_t	lock;		/* protects next 2 sections of vars */
	kcondvar_t	running_cv;	/* signal when upper layers can send */
	kcondvar_t	initwait_cv;	/* signal when ldc conn is up */
	kcondvar_t	dring_free_cv;	/* signal when desc is avail */
	kcondvar_t	membind_cv;	/* signal when mem can be bound */
	boolean_t	self_reset;

	int		initialized;	/* keeps track of what's init'ed */
	vdc_lc_state_t	lifecycle;	/* Current state of the vdc instance */

	int		hshake_cnt;	/* number of failed handshakes */
	int		open_count;	/* count of outstanding opens */
	int		dkio_flush_pending; /* # outstanding DKIO flushes */

	kthread_t	*msg_proc_thr;	/* main msg processing thread */

	kmutex_t	read_lock;	/* lock to protect read */
	kcondvar_t	read_cv;	/* cv to wait for READ events */
	vdc_rd_state_t	read_state;	/* current read state */

	uint32_t	sync_op_cnt;	/* num of active sync operations */
	boolean_t	sync_op_pending; /* sync operation is pending */
	boolean_t	sync_op_blocked; /* blocked waiting to do sync op */
	uint32_t	sync_op_status;	/* status of sync operation */
	kcondvar_t	sync_pending_cv; /* cv wait for sync op to finish */
	kcondvar_t	sync_blocked_cv; /* cv wait for other syncs to finish */

	uint64_t	session_id;	/* common ID sent with all messages */
	uint64_t	seq_num;	/* most recent sequence num generated */
	uint64_t	seq_num_reply;	/* Last seq num ACK/NACK'ed by vds */
	uint64_t	req_id;		/* Most recent Request ID generated */
	uint64_t	req_id_proc;	/* Last request ID processed by vdc */
	vdc_state_t	state;		/* Current disk client-server state */

	dev_info_t	*dip;		/* device info pointer */
	int		instance;	/* driver instance number */

	vio_ver_t	ver;		/* version number agreed with server */
	vd_disk_type_t	vdisk_type;	/* type of device/disk being imported */
	vd_disk_label_t vdisk_label; 	/* label type of device/disk imported */
	uint64_t	vdisk_size;	/* device size in blocks */
	uint64_t	max_xfer_sz;	/* maximum block size of a descriptor */
	uint64_t	block_size;	/* device block size used */
	struct dk_label	*label;		/* structure to store disk label */
	struct dk_cinfo	*cinfo;		/* structure to store DKIOCINFO data */
	struct dk_minfo	*minfo;		/* structure for DKIOCGMEDIAINFO data */
	struct vtoc	*vtoc;		/* structure to store VTOC data */
	ddi_devid_t	devid;		/* device id */

	ldc_mem_info_t		dring_mem_info;		/* dring information */
	uint_t			dring_curr_idx;		/* current index */
	uint32_t		dring_len;		/* dring length */
	uint32_t		dring_max_cookies;	/* dring max cookies */
	uint32_t		dring_cookie_count;	/* num cookies */
	uint32_t		dring_entry_size;	/* descriptor size */
	ldc_mem_cookie_t 	*dring_cookie;		/* dring cookies */
	uint64_t		dring_ident;		/* dring ident */

	uint64_t		threads_pending; 	/* num of threads */

	vdc_local_desc_t	*local_dring;		/* local dring */
	vdc_local_desc_t	*local_dring_backup;	/* local dring backup */
	int			local_dring_backup_tail; /* backup dring tail */
	int			local_dring_backup_len;	/* backup dring len */

	uint64_t		ldc_id;			/* LDC channel id */
	ldc_status_t		ldc_state;		/* LDC channel state */
	ldc_handle_t		ldc_handle;		/* LDC handle */
	ldc_dring_handle_t	ldc_dring_hdl;		/* LDC dring handle */
} vdc_t;

/*
 * Debugging macros
 */
#ifdef DEBUG
extern int	vdc_msglevel;
extern uint64_t	vdc_matchinst;

#define	DMSG(_vdc, err_level, format, ...)				\
	do {								\
		if (vdc_msglevel > err_level &&				\
		(vdc_matchinst & (1ull << (_vdc)->instance)))		\
			cmn_err(CE_CONT, "?[%d,t@%p] %s: "format,	\
			(_vdc)->instance, (void *)curthread,		\
			__func__, __VA_ARGS__);				\
		_NOTE(CONSTANTCONDITION)				\
	} while (0);

#define	DMSGX(err_level, format, ...)					\
	do {								\
		if (vdc_msglevel > err_level)				\
			cmn_err(CE_CONT, "?%s: "format, __func__, __VA_ARGS__);\
		_NOTE(CONSTANTCONDITION)				\
	} while (0);

#define	VDC_DUMP_DRING_MSG(dmsgp)					\
		DMSGX(0, "sq:%lu start:%d end:%d ident:%lu\n",		\
			dmsgp->seq_num, dmsgp->start_idx,		\
			dmsgp->end_idx, dmsgp->dring_ident);

#else	/* !DEBUG */
#define	DMSG(err_level, ...)
#define	DMSGX(err_level, format, ...)
#define	VDC_DUMP_DRING_MSG(dmsgp)

#endif	/* !DEBUG */

#ifdef	__cplusplus
}
#endif

#endif	/* _VDC_H */