summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io/virtio/virtio.h
blob: 420f9ccfedb037cd13fd77a6885c8ba758cafbfa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2019 Joyent, Inc.
 */

#ifndef _VIRTIO_H
#define	_VIRTIO_H

/*
 * VIRTIO FRAMEWORK
 *
 * This framework handles the initialisation and operation common to all Virtio
 * device types; e.g., Virtio Block (vioblk), Virtio Network (vioif), etc.  The
 * framework presently provides for what is now described as a "legacy" driver
 * in the current issue of the "Virtual I/O Device (VIRTIO) Version 1.1"
 * specification.  Though several new specifications have been released, legacy
 * devices are still the most widely available on current hypervisor platforms.
 * Legacy devices make use of the native byte order of the host system.
 *
 * FRAMEWORK INITIALISATION: STARTING
 *
 * Client drivers will, in their attach(9E) routine, make an early call to
 * virtio_init().  This causes the framework to allocate some base resources
 * and begin initialising the device.  This routine confirms that the device
 * will operate in the supported legacy mode as per the specification.  A
 * failure here means that we cannot presently support this device.
 *
 * Once virtio_init() returns, the initialisation phase has begun and the
 * driver can examine negotiated features and set up virtqueues.  The
 * initialisation phase ends when the driver calls either
 * virtio_init_complete() or virtio_fini().
 *
 * FRAMEWORK INITIALISATION: FEATURE NEGOTIATION
 *
 * The virtio_init() call accepts a bitmask of desired features that the driver
 * supports.  The framework will negotiate the common set of features supported
 * by both the driver and the device.  The presence of any individual feature
 * can be tested after the initialisation phase has begun using
 * virtio_feature_present().
 *
 * The framework will additionally negotiate some set of features that are not
 * specific to a device type on behalf of the client driver; e.g., support for
 * indirect descriptors.
 *
 * Some features allow the driver to read additional configuration values from
 * the device-specific regions of the device register space.  These can be
 * accessed via the virtio_dev_get*() and virtio_dev_put*() family of
 * functions.
 *
 * FRAMEWORK INITIALISATION: VIRTQUEUE CONFIGURATION
 *
 * During the initialisation phase, the client driver may configure some number
 * of virtqueues with virtio_queue_alloc().  Once initialisation has been
 * completed, no further queues can be configured without destroying the
 * framework object and beginning again from scratch.
 *
 * When configuring a queue, the driver must know the queue index number.  This
 * generally comes from the section of the specification describing the
 * specific device type; e.g., Virtio Network devices have a receive queue at
 * index 0, and a transmit queue at index 1.  The name given to the queue is
 * informational and has no impact on device operation.
 *
 * Most queues will require an interrupt handler function.  When a queue
 * notification interrupt is received, the provided handler will be called with
 * two arguments: first, the provided user data argument; and second, a pointer
 * to the "virtio_t" object for this instance.
 *
 * A maximum segment count must be selected for each queue.  This count is the
 * upper bound on the number of scatter-gather cookies that will be accepted,
 * and applies to both direct and indirect descriptor based queues.  This cap
 * is usually either negotiated with the device, or determined structurally
 * based on the shape of the buffers required for device operation.
 *
 * FRAMEWORK INITIALISATION: FINISHING
 *
 * Once queue configuration has been completed, the client driver calls
 * virtio_init_complete() to finalise resource allocation and set the device to
 * the running state (DRIVER_OK).  The framework will allocate any interrupts
 * needed for queue notifications at this time.
 *
 * If the client driver cannot complete initialisation, the instance may
 * instead be torn down with virtio_fini().  Signalling failure to this routine
 * will report failure to the device instead of resetting it, which may be
 * reported by the hypervisor as a fault.
 *
 * DESCRIPTOR CHAINS
 *
 * Most devices accept I/O requests from the driver through a least one queue.
 * Some devices are operated by submission of synchronous requests.  The device
 * is expected to process the request and return some kind of status; e.g., a
 * block device accepts write requests from the file system and signals when
 * they have completed or failed.
 *
 * Other devices operate by asynchronous delivery of I/O requests to the
 * driver; e.g., a network device may receive incoming frames at any time.
 * Inbound asynchronous delivery is usually achieved by populating a queue with
 * a series of memory buffers where the incoming data will be written by the
 * device at some later time.
 *
 * Whether for inbound or outbound transfers, buffers are inserted into the
 * ring through chains of one or more descriptors.  Each descriptor has a
 * transfer direction (to or from the device), and a physical address and
 * length (i.e., a DMA cookie).  The framework automatically manages the slight
 * differences in operation between direct and indirect descriptor usage on
 * behalf of the client driver.
 *
 * A chain of descriptors is allocated by calling virtio_chain_alloc() against
 * a particular queue.  This function accepts a kmem flag as per
 * kmem_alloc(9F).  A client driver specific void pointer may be attached to
 * the chain with virtio_chain_data_set() and read back later with
 * virtio_chain_data(); e.g., after it is returned by a call to
 * virtio_queue_poll().
 *
 * Cookies are added to a chain by calling virtio_chain_append() with the
 * appropriate physical address and transfer direction.  This function may fail
 * if the chain is already using the maximum number of cookies for this queue.
 * Client drivers are responsible for appropriate use of virtio_dma_sync()
 * or ddi_dma_sync(9F) on any memory appended to a descriptor chain prior to
 * chain submission.
 *
 * Once fully constructed and synced, a chain can be submitted to the device by
 * calling virtio_chain_submit().  The caller may choose to flush the queue
 * contents to the device on each submission, or to batch notifications until
 * later to amortise the notification cost over more requests.  If batching
 * notifications, outstanding submissions can be flushed with a call to
 * virtio_queue_flush().  Note that the framework will insert an appropriate
 * memory barrier to ensure writes by the driver complete before making the
 * submitted descriptor visible to the device.
 *
 * A chain may be reset for reuse with new cookies by calling
 * virtio_chain_clear().  The chain may be freed completely by calling
 * virtio_chain_free().
 *
 * When a descriptor chain is returned to the driver by the device, it may
 * include a received data length value.  This value can be accessed via
 * virtio_chain_received_length().  There is some suggestion in more recent
 * Virtio specifications that, depending on the device type and the hypervisor
 * this value may not always be accurate or useful.
 *
 * VIRTQUEUE OPERATION
 *
 * The queue size (i.e., the number of direct descriptor entries) can be
 * found with virtio_queue_size().  This value is static over the lifetime
 * of the queue.
 *
 * The number of descriptor chains presently submitted to the device and not
 * yet returned can be obtained via virtio_queue_nactive().
 *
 * Over time the device will return descriptor chains to the driver in response
 * to device activity.  Any newly returned chains may be retrieved by the
 * driver by calling virtio_queue_poll().  See the DESCRIPTOR CHAINS section
 * for more detail about managing descriptor chain objects.  Note that the
 * framework will insert an appropriate memory barrier to ensure that writes by
 * the host are complete before returning the chain to the client driver.
 *
 * The NO_INTERRUPT flag on a queue may be set or cleared with
 * virtio_queue_no_interrupt().  Note that this flag is purely advisory, and
 * may not actually stop interrupts from the device in a timely fashion.
 *
 * INTERRUPT MANAGEMENT
 *
 * A mutex used within an interrupt handler must be initialised with the
 * correct interrupt priority.  After the initialisation phase is complete, the
 * client should use virtio_intr_pri() to get a value suitable to pass to
 * mutex_init(9F).
 *
 * When the driver is ready to receive notifications from the device, the
 * virtio_interrupts_enable() routine may be called.  Interrupts may be
 * disabled again by calling virtio_interrupts_disable().  Interrupt resources
 * will be deallocated as part of a subsequent call to virtio_fini().
 *
 * DMA MEMORY MANAGEMENT: ALLOCATION AND FREE
 *
 * Client drivers may allocate memory suitable for communication with the
 * device by using virtio_dma_alloc().  This function accepts an allocation
 * size, a DMA attribute template, a set of DMA flags, and a kmem flag.
 * A "virtio_dma_t" object is returned to track and manage the allocation.
 *
 * The DMA flags value will be a combination of direction flags (e.g.,
 * DDI_DMA_READ or DDI_DMA_WRITE) and mapping flags (e.g., DDI_DMA_CONSISTENT
 * or DDI_DMA_STREAMING).  The kmem flag is either KM_SLEEP or KM_NOSLEEP,
 * as described in kmem_alloc(9F).
 *
 * Memory that is no longer required can be freed using virtio_dma_free().
 *
 * DMA MEMORY MANAGEMENT: BINDING WITHOUT ALLOCATION
 *
 * If another subsystem has loaned memory to your client driver, you may need
 * to allocate and bind a handle without additional backing memory.  The
 * virtio_dma_alloc_nomem() function can be used for this purpose, returning a
 * "virtio_dma_t" object.
 *
 * Once allocated, an arbitrary kernel memory location can be bound for DMA
 * with virtio_dma_bind().  The binding can be subsequently undone with
 * virtio_dma_unbind(), allowing the "virtio_dma_t" object to be reused for
 * another binding.
 *
 * DMA MEMORY MANAGEMENT: VIRTUAL AND PHYSICAL ADDRESSES
 *
 * The total size of a mapping (with or without own backing memory) can be
 * found with virtio_dma_size().  A void pointer to a kernel virtual address
 * within the buffer can be obtained via virtio_dma_va(); this function accepts
 * a linear offset into the VA range and performs bounds checking.
 *
 * The number of physical memory addresses (DMA cookies) can be found with
 * virtio_dma_ncookies().  The physical address and length of each cookie can
 * be found with virtio_dma_cookie_pa() and virtio_dma_cookie_size(); these
 * functions are keyed on the zero-indexed cookie number.
 *
 * DMA MEMORY MANAGEMENT: SYNCHRONISATION
 *
 * When passing memory to the device, or reading memory returned from the
 * device, DMA synchronisation must be performed in case it is required by the
 * underlying platform.  A convenience wrapper exists: virtio_dma_sync().  This
 * routine synchronises the entire binding and accepts the same synchronisation
 * type values as ddi_dma_sync(9F).
 *
 * QUIESCE
 *
 * As quiesce(9E) merely requires that the device come to a complete stop, most
 * client drivers will be able to call virtio_quiesce() without additional
 * actions.  This will reset the device, immediately halting all queue
 * activity, and return a value suitable for returning from the client driver
 * quiesce(9E) entrypoint.  This routine must only be called from quiesce
 * context as it performs no synchronisation with other threads.
 *
 * DETACH
 *
 * Some devices are effectively long-polled; that is, they submit some number
 * of descriptor chains to the device that are not returned to the driver until
 * some asynchronous event occurs such as the receipt of an incoming packet or
 * a device hot plug event.  When detaching the device the return of these
 * outstanding buffers must be arranged.  Some device types may have task
 * management commands that can force the orderly return of these chains, but
 * the only way to do so uniformly is to reset the device and claw back the
 * memory.
 *
 * If the client driver has outstanding descriptors and needs a hard stop on
 * device activity it can call virtio_shutdown().  This routine will bring
 * queue processing to an orderly stop and then reset the device, causing it to
 * cease use of any DMA resources.  Once this function returns, the driver may
 * call virtio_queue_evacuate() on each queue to retrieve any previously
 * submitted chains.
 *
 * To tear down resources (e.g., interrupts and allocated memory) the client
 * driver must finally call virtio_fini().  If virtio_shutdown() was not
 * needed, this routine will also reset the device.
 */

#ifdef __cplusplus
extern "C" {
#endif

typedef struct virtio virtio_t;
typedef struct virtio_queue virtio_queue_t;
typedef struct virtio_chain virtio_chain_t;
typedef struct virtio_dma virtio_dma_t;

typedef enum virtio_direction {
	/*
	 * In the base specification, a descriptor is either set up to be
	 * written by the device or to be read by the device, but not both.
	 */
	VIRTIO_DIR_DEVICE_WRITES = 1,
	VIRTIO_DIR_DEVICE_READS
} virtio_direction_t;

void virtio_fini(virtio_t *, boolean_t);
virtio_t *virtio_init(dev_info_t *, uint64_t, boolean_t);
int virtio_init_complete(virtio_t *, int);
int virtio_quiesce(virtio_t *);
void virtio_shutdown(virtio_t *);

void *virtio_intr_pri(virtio_t *);

void virtio_device_reset(virtio_t *);

uint8_t virtio_dev_get8(virtio_t *, uintptr_t);
uint16_t virtio_dev_get16(virtio_t *, uintptr_t);
uint32_t virtio_dev_get32(virtio_t *, uintptr_t);
uint64_t virtio_dev_get64(virtio_t *, uintptr_t);

void virtio_dev_put8(virtio_t *, uintptr_t, uint8_t);
void virtio_dev_put16(virtio_t *, uintptr_t, uint16_t);
void virtio_dev_put32(virtio_t *, uintptr_t, uint32_t);

boolean_t virtio_feature_present(virtio_t *, uint64_t);

virtio_queue_t *virtio_queue_alloc(virtio_t *, uint16_t, const char *,
    ddi_intr_handler_t *, void *, boolean_t, uint_t);

virtio_chain_t *virtio_queue_poll(virtio_queue_t *);
virtio_chain_t *virtio_queue_evacuate(virtio_queue_t *);
void virtio_queue_flush(virtio_queue_t *);
void virtio_queue_no_interrupt(virtio_queue_t *, boolean_t);
uint_t virtio_queue_nactive(virtio_queue_t *);
uint_t virtio_queue_size(virtio_queue_t *);

virtio_chain_t *virtio_chain_alloc(virtio_queue_t *, int);
void virtio_chain_clear(virtio_chain_t *);
void virtio_chain_free(virtio_chain_t *);
int virtio_chain_append(virtio_chain_t *, uint64_t, size_t, virtio_direction_t);

void *virtio_chain_data(virtio_chain_t *);
void virtio_chain_data_set(virtio_chain_t *, void *);

void virtio_chain_submit(virtio_chain_t *, boolean_t);
size_t virtio_chain_received_length(virtio_chain_t *);

int virtio_interrupts_enable(virtio_t *);
void virtio_interrupts_disable(virtio_t *);

virtio_dma_t *virtio_dma_alloc(virtio_t *, size_t, const ddi_dma_attr_t *, int,
    int);
virtio_dma_t *virtio_dma_alloc_nomem(virtio_t *, const ddi_dma_attr_t *, int);
void virtio_dma_free(virtio_dma_t *);
int virtio_dma_bind(virtio_dma_t *, void *, size_t, int, int);
void virtio_dma_unbind(virtio_dma_t *);
void virtio_dma_sync(virtio_dma_t *, int);

void *virtio_dma_va(virtio_dma_t *, size_t);
size_t virtio_dma_size(virtio_dma_t *);
uint_t virtio_dma_ncookies(virtio_dma_t *);
uint64_t virtio_dma_cookie_pa(virtio_dma_t *, uint_t);
size_t virtio_dma_cookie_size(virtio_dma_t *, uint_t);


#ifdef __cplusplus
}
#endif

#endif /* _VIRTIO_H */