1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
|
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_XDF_H
#define _SYS_XDF_H
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/cmlb.h>
#include <sys/dkio.h>
#include <sys/gnttab.h>
#include <xen/sys/xendev.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* VBDs have standard 512 byte blocks
* A single blkif_request can transfer up to 11 pages of data, 1 page/segment
*/
#define XB_BSIZE DEV_BSIZE
#define XB_BMASK (XB_BSIZE - 1)
#define XB_BSHIFT 9
#define XB_DTOB(bn, vdp) ((bn) * (vdp)->xdf_xdev_secsize)
#define XB_MAX_SEGLEN (8 * XB_BSIZE)
#define XB_SEGOFFSET (XB_MAX_SEGLEN - 1)
#define XB_MAX_XFER (XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST)
#define XB_MAXPHYS (XB_MAX_XFER * BLKIF_RING_SIZE)
/* Number of sectors per segement */
#define XB_NUM_SECTORS_PER_SEG (PAGESIZE / XB_BSIZE)
/* sectors are number 0 through XB_NUM_SECTORS_PER_SEG - 1 */
#define XB_LAST_SECTOR_IN_SEG (XB_NUM_SECTORS_PER_SEG - 1)
/*
* Slice for absolute disk transaction.
*
* Hack Alert. XB_SLICE_NONE is a magic value that can be written into the
* b_private field of buf structures passed to xdf_strategy(). When present
* it indicates that the I/O is using an absolute offset. (ie, the I/O is
* not bound to any one partition.) This magic value is currently used by
* the pv_cmdk driver. This hack is shamelessly stolen from the sun4v vdc
* driver, another virtual disk device driver. (Although in the case of
* vdc the hack is less egregious since it is self contained within the
* vdc driver, where as here it is used as an interface between the pv_cmdk
* driver and the xdf driver.)
*/
#define XB_SLICE_NONE 0xFF
/*
* blkif status
*/
typedef enum xdf_state {
/*
* initial state
*/
XD_UNKNOWN = 0,
/*
* ring and evtchn alloced, xenbus state changed to
* XenbusStateInitialised, wait for backend to connect
*/
XD_INIT = 1,
/*
* backend and frontend xenbus state has changed to
* XenbusStateConnected. IO is now allowed, but we are not still
* fully initialized.
*/
XD_CONNECTED = 2,
/*
* We're fully initialized and allowing regular IO.
*/
XD_READY = 3,
/*
* vbd interface close request received from backend, no more I/O
* requestis allowed to be put into ring buffer, while interrupt handler
* is allowed to run to finish any outstanding I/O request, disconnect
* process is kicked off by changing xenbus state to XenbusStateClosed
*/
XD_CLOSING = 4,
/*
* disconnection process finished, both backend and frontend's
* xenbus state has been changed to XenbusStateClosed, can be detached
*/
XD_CLOSED = 5,
/*
* We're either being suspended or resuming from a suspend. If we're
* in the process of suspending, we block all new IO, but but allow
* existing IO to drain.
*/
XD_SUSPEND = 6
} xdf_state_t;
/*
* 16 partitions + fdisk
*/
#define XDF_PSHIFT 6
#define XDF_PMASK ((1 << XDF_PSHIFT) - 1)
#define XDF_PEXT (1 << XDF_PSHIFT)
#define XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m))
#define XDF_INST(m) ((m) >> XDF_PSHIFT)
#define XDF_PART(m) ((m) & XDF_PMASK)
/*
* one blkif_request_t will have one corresponding ge_slot_t
* where we save those grant table refs used in this blkif_request_t
*
* the id of this ge_slot_t will also be put into 'id' field in
* each blkif_request_t when sent out to the ring buffer.
*/
typedef struct ge_slot {
list_node_t gs_vreq_link;
struct v_req *gs_vreq;
domid_t gs_oeid;
int gs_isread;
grant_ref_t gs_ghead;
int gs_ngrefs;
grant_ref_t gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} ge_slot_t;
/*
* vbd I/O request
*
* An instance of this structure is bound to each buf passed to
* the driver's strategy by setting the pointer into bp->av_back.
* The id of this vreq will also be put into 'id' field in each
* blkif_request_t when sent out to the ring buffer for one DMA
* window of this buf.
*
* Vreq mainly contains DMA information for this buf. In one vreq/buf,
* there could be more than one DMA window, each of which will be
* mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant
* table entry information for this buf. The ge_slot_t for current DMA
* window is pointed to by v_gs in vreq.
*
* So, grant table entries will only be alloc'ed when the DMA window is
* about to be transferred via blkif_request_t to the ring buffer. And
* they will be freed right after the blkif_response_t is seen. By this
* means, we can make use of grant table entries more efficiently.
*/
typedef struct v_req {
list_node_t v_link;
list_t v_gs;
int v_status;
buf_t *v_buf;
uint_t v_ndmacs;
uint_t v_dmaw;
uint_t v_ndmaws;
uint_t v_nslots;
uint64_t v_blkno;
ddi_dma_handle_t v_memdmahdl;
ddi_acc_handle_t v_align;
ddi_dma_handle_t v_dmahdl;
ddi_dma_cookie_t v_dmac;
caddr_t v_abuf;
uint8_t v_flush_diskcache;
boolean_t v_runq;
} v_req_t;
/*
* Status set and checked in vreq->v_status by vreq_setup()
*
* These flags will help us to continue the vreq setup work from last failure
* point, instead of starting from scratch after each failure.
*/
#define VREQ_INIT 0x0
#define VREQ_INIT_DONE 0x1
#define VREQ_DMAHDL_ALLOCED 0x2
#define VREQ_MEMDMAHDL_ALLOCED 0x3
#define VREQ_DMAMEM_ALLOCED 0x4
#define VREQ_DMABUF_BOUND 0x5
#define VREQ_GS_ALLOCED 0x6
#define VREQ_DMAWIN_DONE 0x7
/*
* virtual block device per-instance softstate
*/
typedef struct xdf {
dev_info_t *xdf_dip;
char *xdf_addr;
ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */
domid_t xdf_peer; /* otherend's dom ID */
xendev_ring_t *xdf_xb_ring; /* I/O ring buffer */
ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */
list_t xdf_vreq_act; /* active vreq list */
buf_t *xdf_f_act; /* active buf list head */
buf_t *xdf_l_act; /* active buf list tail */
buf_t *xdf_i_act; /* active buf list index */
xdf_state_t xdf_state; /* status of this virtual disk */
boolean_t xdf_suspending;
ulong_t xdf_vd_open[OTYPCNT];
ulong_t xdf_vd_lyropen[XDF_PEXT];
ulong_t xdf_connect_req;
kthread_t *xdf_connect_thread;
ulong_t xdf_vd_exclopen;
kmutex_t xdf_iostat_lk; /* muxes lock for the iostat ptr */
kmutex_t xdf_dev_lk; /* mutex lock for I/O path */
kmutex_t xdf_cb_lk; /* mutex lock for event handling path */
kcondvar_t xdf_dev_cv; /* cv used in I/O path */
uint_t xdf_dinfo; /* disk info from backend xenstore */
diskaddr_t xdf_xdev_nblocks; /* total size in block */
uint_t xdf_xdev_secsize; /* disk blksize from backend */
cmlb_geom_t xdf_pgeom;
boolean_t xdf_pgeom_set;
boolean_t xdf_pgeom_fixed;
kstat_t *xdf_xdev_iostat;
cmlb_handle_t xdf_vd_lbl;
ddi_softintr_t xdf_softintr_id;
timeout_id_t xdf_timeout_id;
struct gnttab_free_callback xdf_gnt_callback;
boolean_t xdf_feature_barrier;
boolean_t xdf_flush_supported;
boolean_t xdf_media_req_supported;
boolean_t xdf_wce;
boolean_t xdf_cmbl_reattach;
char *xdf_flush_mem;
char *xdf_cache_flush_block;
int xdf_evtchn;
enum dkio_state xdf_mstate;
kcondvar_t xdf_mstate_cv;
kcondvar_t xdf_hp_status_cv;
struct buf *xdf_ready_bp;
ddi_taskq_t *xdf_ready_tq;
kthread_t *xdf_ready_tq_thread;
struct buf *xdf_ready_tq_bp;
#ifdef DEBUG
int xdf_dmacallback_num;
kthread_t *xdf_oe_change_thread;
#endif
} xdf_t;
/*
* VBD I/O requests must be aligned on a 512-byte boundary and specify
* a transfer size which is a mutiple of 512-bytes
*/
#define ALIGNED_XFER(bp) \
((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \
(((bp)->b_bcount & XB_BMASK) == 0))
#define U_INVAL(u) (((u)->uio_loffset & (offset_t)(XB_BMASK)) || \
((u)->uio_iov->iov_len & (offset_t)(XB_BMASK)))
/* wrap pa_to_ma() for xdf to run in dom0 */
#define PATOMA(addr) (DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr))
#define XD_IS_RO(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY)
#define XD_IS_CD(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM)
#define XD_IS_RM(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE)
#define IS_READ(bp) VOID2BOOLEAN((bp)->b_flags & B_READ)
#define IS_ERROR(bp) VOID2BOOLEAN((bp)->b_flags & B_ERROR)
#define XDF_UPDATE_IO_STAT(vdp, bp) \
{ \
kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat); \
size_t n_done = (bp)->b_bcount - (bp)->b_resid; \
if ((bp)->b_flags & B_READ) { \
kip->reads++; \
kip->nread += n_done; \
} else { \
kip->writes++; \
kip->nwritten += n_done; \
} \
}
#ifdef DEBUG
#define DPRINTF(flag, args) {if (xdf_debug & (flag)) prom_printf args; }
#define SETDMACBON(vbd) {(vbd)->xdf_dmacallback_num++; }
#define SETDMACBOFF(vbd) {(vbd)->xdf_dmacallback_num--; }
#define ISDMACBON(vbd) ((vbd)->xdf_dmacallback_num > 0)
#else
#define DPRINTF(flag, args)
#define SETDMACBON(vbd)
#define SETDMACBOFF(vbd)
#define ISDMACBON(vbd)
#endif /* DEBUG */
#define DDI_DBG 0x1
#define DMA_DBG 0x2
#define INTR_DBG 0x8
#define IO_DBG 0x10
#define IOCTL_DBG 0x20
#define SUSRES_DBG 0x40
#define LBL_DBG 0x80
#if defined(XPV_HVM_DRIVER)
extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
void *);
extern void xdfmin(struct buf *bp);
extern dev_info_t *xdf_hvm_hold(const char *);
extern boolean_t xdf_hvm_connect(dev_info_t *);
extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *);
extern int xdf_kstat_create(dev_info_t *, char *, int);
extern void xdf_kstat_delete(dev_info_t *);
extern boolean_t xdf_is_cd(dev_info_t *);
extern boolean_t xdf_is_rm(dev_info_t *);
extern boolean_t xdf_media_req_supported(dev_info_t *);
#endif /* XPV_HVM_DRIVER */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_XDF_H */
|