summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io/vioif/vioif.h
blob: 8e7dae320c4465c38ad134bef24c91970615ad47 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2021 Joyent, Inc.
 */

/*
 * VIRTIO NETWORK DRIVER
 */

#ifndef _VIOIF_H
#define	_VIOIF_H

#include "virtio.h"

#ifdef __cplusplus
extern "C" {
#endif

/*
 * VIRTIO NETWORK CONFIGURATION REGISTERS
 *
 * These are offsets into the device-specific configuration space available
 * through the virtio_dev_*() family of functions.
 */
#define	VIRTIO_NET_CONFIG_MAC		0x00	/* 48 R/W */
#define	VIRTIO_NET_CONFIG_STATUS	0x06	/* 16 R   */
#define	VIRTIO_NET_CONFIG_MAX_VQ_PAIRS	0x08	/* 16 R   */
#define	VIRTIO_NET_CONFIG_MTU		0x0A	/* 16 R   */

/*
 * VIRTIO NETWORK VIRTQUEUES
 *
 * Note that the control queue is only present if VIRTIO_NET_F_CTRL_VQ is
 * negotiated with the device.
 */
#define	VIRTIO_NET_VIRTQ_RX		0
#define	VIRTIO_NET_VIRTQ_TX		1
#define	VIRTIO_NET_VIRTQ_CONTROL	2

/*
 * VIRTIO NETWORK FEATURE BITS
 */

/*
 * CSUM, GUEST_CSUM:
 *	Partial checksum support.  These features signal that the device will
 *	accept packets with partial checksums (CSUM), and that the driver will
 *	accept packets with partial checksums (GUEST_CSUM).  These features
 *	combine the use of the VIRTIO_NET_HDR_F_NEEDS_CSUM flag, and the
 *	"csum_start" and "csum_offset" fields, in the virtio net header.
 */
#define	VIRTIO_NET_F_CSUM		(1ULL << 0)
#define	VIRTIO_NET_F_GUEST_CSUM		(1ULL << 1)

/*
 * MTU:
 *	The device offers a maximum MTU value at VIRTIO_NET_CONFIG_MTU.  If
 *	this is not negotiated, we allow the largest possible MTU that our
 *	buffer allocations support in case jumbo frames are tacitly supported
 *	by the device.  The default MTU is always 1500.
 */
#define	VIRTIO_NET_F_MTU		(1ULL << 3)

/*
 * MAC:
 *	The device has an assigned primary MAC address.  If this feature bit is
 *	not set, the driver must provide a locally assigned MAC address.  See
 *	IEEE 802, "48-bit universal LAN MAC addresses" for more details on
 *	assignment.
 */
#define	VIRTIO_NET_F_MAC		(1ULL << 5)

/*
 * GUEST_TSO4, GUEST_TSO6, GUEST_UFO:
 *	Inbound segmentation offload support.  These features depend on having
 *	VIRTIO_NET_F_GUEST_CSUM and signal that the driver can accept large
 *	combined TCP (v4 or v6) packets, or reassembled UDP fragments.
 */
#define	VIRTIO_NET_F_GUEST_TSO4		(1ULL << 7)
#define	VIRTIO_NET_F_GUEST_TSO6		(1ULL << 8)
#define	VIRTIO_NET_F_GUEST_UFO		(1ULL << 10)

/*
 * GUEST_ECN:
 *	Depends on either VIRTIO_NET_F_GUEST_TSO4 or VIRTIO_NET_F_GUEST_TSO6.
 *	This feature means the driver will look for the VIRTIO_NET_HDR_GSO_ECN
 *	bit in the "gso_type" of the virtio net header.  This bit tells the
 *	driver that the Explicit Congestion Notification (ECN) bit was set in
 *	the original TCP packets.
 */
#define	VIRTIO_NET_F_GUEST_ECN		(1ULL << 9)

/*
 * HOST_TSO4, HOST_TSO6, HOST_UFO:
 *	Outbound segmentation offload support.  These features depend on having
 *	VIRTIO_NET_F_CSUM and signal that the device will accept large combined
 *	TCP (v4 or v6) packets that require segmentation offload, or large
 *	combined UDP packets that require fragmentation offload.
 */
#define	VIRTIO_NET_F_HOST_TSO4		(1ULL << 11)
#define	VIRTIO_NET_F_HOST_TSO6		(1ULL << 12)
#define	VIRTIO_NET_F_HOST_UFO		(1ULL << 14)

/*
 * HOST_ECN:
 *	Depends on either VIRTIO_NET_F_HOST_TSO4 or VIRTIO_NET_F_HOST_TSO6.
 *	This features means the device will accept packets that both require
 *	segmentation offload and have the Explicit Congestion Notification
 *	(ECN) bit set.  If this feature is not present, the device must not
 *	send large segments that require ECN to be set.
 */
#define	VIRTIO_NET_F_HOST_ECN		(1ULL << 13)

/*
 * GSO:
 *	The GSO feature is, in theory, the combination of HOST_TSO4, HOST_TSO6,
 *	and HOST_ECN.  This is only useful for legacy devices; newer devices
 *	should be using the more specific bits above.
 */
#define	VIRTIO_NET_F_GSO		(1ULL << 6)

/*
 * MRG_RXBUF:
 *	This feature allows the receipt of large packets without needing to
 *	allocate large buffers.  The "virtio_net_hdr" will include an extra
 *	value: the number of buffers to gang together.
 */
#define	VIRTIO_NET_F_MRG_RXBUF		(1ULL << 15)

/*
 * STATUS:
 *	The VIRTIO_NET_CONFIG_STATUS configuration register is available, which
 *	allows the driver to read the link state from the device.
 */
#define	VIRTIO_NET_F_STATUS		(1ULL << 16)

/*
 * CTRL_VQ, CTRL_RX, CTRL_VLAN:
 *	These features signal that the device exposes the control queue
 *	(VIRTIO_NET_VIRTQ_CONTROL), in the case of CTRL_VQ; and that the
 *	control queue supports extra commands (CTRL_RX, CTRL_VLAN).
 */
#define	VIRTIO_NET_F_CTRL_VQ		(1ULL << 17)
#define	VIRTIO_NET_F_CTRL_RX		(1ULL << 18)
#define	VIRTIO_NET_F_CTRL_VLAN		(1ULL << 19)
#define	VIRTIO_NET_F_CTRL_RX_EXTRA	(1ULL << 20)

/*
 * These features are supported by the driver and we will request them from the
 * device.  Note that we do not currently request GUEST_CSUM, as the driver
 * does not presently support receiving frames with any offload features from
 * the device.
 */
#define	VIRTIO_NET_WANTED_FEATURES	(VIRTIO_NET_F_CSUM |		\
					VIRTIO_NET_F_GSO |		\
					VIRTIO_NET_F_HOST_TSO4 |	\
					VIRTIO_NET_F_HOST_TSO6 |	\
					VIRTIO_NET_F_HOST_ECN |		\
					VIRTIO_NET_F_MAC |		\
					VIRTIO_NET_F_MTU |		\
					VIRTIO_NET_F_CTRL_VQ |		\
					VIRTIO_NET_F_CTRL_RX)

/*
 * VIRTIO NETWORK HEADER
 *
 * This structure appears at the start of each transmit or receive packet
 * buffer.
 */
struct virtio_net_hdr {
	uint8_t				vnh_flags;
	uint8_t				vnh_gso_type;
	uint16_t			vnh_hdr_len;
	uint16_t			vnh_gso_size;
	uint16_t			vnh_csum_start;
	uint16_t			vnh_csum_offset;
} __packed;

/*
 * VIRTIO NETWORK HEADER: FLAGS (vnh_flags)
 */
#define	VIRTIO_NET_HDR_F_NEEDS_CSUM	0x01

/*
 * VIRTIO NETWORK HEADER: OFFLOAD OPTIONS (vnh_gso_type)
 *
 * Each of these is an offload type, except for the ECN value which is
 * logically OR-ed with one of the other types.
 */
#define	VIRTIO_NET_HDR_GSO_NONE		0
#define	VIRTIO_NET_HDR_GSO_TCPV4	1
#define	VIRTIO_NET_HDR_GSO_UDP		3
#define	VIRTIO_NET_HDR_GSO_TCPV6	4
#define	VIRTIO_NET_HDR_GSO_ECN		0x80

/*
 * VIRTIO CONTROL VIRTQUEUE HEADER
 *
 * This structure appears at the start of each control virtqueue request.
 */
struct virtio_net_ctrlq_hdr {
	uint8_t		vnch_class;
	uint8_t		vnch_command;
} __packed;

/*
 * Contol Queue Classes
 */
#define	VIRTIO_NET_CTRL_RX		0

/*
 * CTRL_RX commands
 */
#define	VIRTIO_NET_CTRL_RX_PROMISC	0
#define	VIRTIO_NET_CTRL_RX_ALLMULTI	1
#define	VIRTIO_NET_CTRL_RX_ALLUNI	2
#define	VIRTIO_NET_CTRL_RX_NOMULTI	3
#define	VIRTIO_NET_CTRL_RX_NOUNI	4
#define	VIRTIO_NET_CTRL_RX_NOBCAST	5

/*
 * Control queue ack values
 */
#define	VIRTIO_NET_CQ_OK		0
#define	VIRTIO_NET_CQ_ERR		1


/*
 * DRIVER PARAMETERS
 */

/*
 * At attach, we allocate a fixed pool of buffers for receipt and transmission
 * of frames.  The maximum number of buffers of each type that we will allocate
 * is specified here.  If the ring size is smaller than this number, we will
 * use the ring size instead.
 */
#define	VIRTIO_NET_TX_BUFS		256
#define	VIRTIO_NET_RX_BUFS		256

/*
 * Initially, only use a single buf for control queue requests (when
 * present). If this becomes a bottleneck, we can simply increase this
 * value as necessary.
 */
#define	VIRTIO_NET_CTRL_BUFS		1

/*
 * The virtio net header and the first buffer segment share the same DMA
 * allocation.  We round up the virtio header size to a multiple of 4 and add 2
 * bytes so that the IP header, which starts immediately after the 14 or 18
 * byte Ethernet header, is then correctly aligned:
 *
 *   0                10      16   18                              32/36
 *   | virtio_net_hdr | %4==0 | +2 | Ethernet header (14/18 bytes) | IPv4 ...
 *
 * Note that for this to work correctly, the DMA allocation must also be 4 byte
 * aligned.
 */
#define	VIOIF_HEADER_ALIGN		4
#define	VIOIF_HEADER_SKIP		(P2ROUNDUP( \
					    sizeof (struct virtio_net_hdr), \
					    VIOIF_HEADER_ALIGN) + 2)

/*
 * Given we are not negotiating VIRTIO_NET_F_MRG_RXBUF, the specification says
 * we must be able to accept a 1514 byte packet, or if any segmentation offload
 * features have been negotiated a 65550 byte packet.  To keep things simple,
 * we'll assume segmentation offload is possible in most cases.  In addition to
 * the packet payload, we need to account for the Ethernet header and the
 * virtio_net_hdr.
 */
#define	VIOIF_RX_DATA_SIZE		65550
#define	VIOIF_RX_BUF_SIZE		(VIOIF_RX_DATA_SIZE + \
					    sizeof (struct ether_header) + \
					    VIOIF_HEADER_SKIP)

/*
 * If we assume that a large allocation will probably have mostly 4K page sized
 * cookies, 64 segments allows us 256KB for a single frame.  We're in control
 * of the allocation we use for receive buffers, so this value only has an
 * impact on the length of chain we're able to create for external transmit
 * buffer mappings.
 */
#define	VIOIF_MAX_SEGS			64

/*
 * We pre-allocate a reasonably large buffer to copy small packets
 * there. Bigger packets are mapped, packets with multiple
 * cookies are mapped as indirect buffers.
 */
#define	VIOIF_TX_INLINE_SIZE		(2 * 1024)

/*
 * Control queue messages are very small. This is a rather arbitrary small
 * bufer size that should be sufficiently large for any control queue
 * messages we will send.
 */
#define	VIOIF_CTRL_SIZE			256

/*
 * TYPE DEFINITIONS
 */

typedef struct vioif vioif_t;

/*
 * Receive buffers are allocated in advance as a combination of DMA memory and
 * a descriptor chain.  Receive buffers can be loaned to the networking stack
 * to avoid copying, and this object contains the free routine to pass to
 * desballoc().
 *
 * When receive buffers are not in use, they are linked into the per-instance
 * free list, "vif_rxbufs" via "rb_link".  Under normal conditions, we expect
 * the free list to be empty much of the time; most buffers will be in the ring
 * or on loan.
 */
typedef struct vioif_rxbuf {
	vioif_t				*rb_vioif;
	frtn_t				rb_frtn;

	virtio_dma_t			*rb_dma;
	virtio_chain_t			*rb_chain;

	list_node_t			rb_link;
} vioif_rxbuf_t;

typedef struct vioif_ctrlbuf {
	vioif_t				*cb_vioif;

	virtio_dma_t			*cb_dma;
	virtio_chain_t			*cb_chain;

	list_node_t			cb_link;
} vioif_ctrlbuf_t;

/*
 * Transmit buffers are also allocated in advance.  DMA memory is allocated for
 * the virtio net header, and to hold small packets.  Larger packets are mapped
 * from storage loaned to the driver by the network stack.
 *
 * When transmit buffers are not in use, they are linked into the per-instance
 * free list, "vif_txbufs" via "tb_link".
 */
typedef struct vioif_txbuf {
	mblk_t				*tb_mp;

	/*
	 * Inline buffer space (VIOIF_TX_INLINE_SIZE) for storage of the virtio
	 * net header, and to hold copied (rather than mapped) packet data.
	 */
	virtio_dma_t			*tb_dma;
	virtio_chain_t			*tb_chain;

	/*
	 * External buffer mapping.  The capacity is fixed at allocation time,
	 * and "tb_ndmaext" tracks the current number of mappings.
	 */
	virtio_dma_t			**tb_dmaext;
	uint_t				tb_dmaext_capacity;
	uint_t				tb_ndmaext;

	list_node_t			tb_link;
} vioif_txbuf_t;

typedef enum vioif_runstate {
	VIOIF_RUNSTATE_STOPPED = 1,
	VIOIF_RUNSTATE_STOPPING,
	VIOIF_RUNSTATE_RUNNING
} vioif_runstate_t;

/*
 * Per-instance driver object.
 */
struct vioif {
	dev_info_t			*vif_dip;
	virtio_t			*vif_virtio;

	kmutex_t			vif_mutex;

	/*
	 * The NIC is considered RUNNING between the mc_start(9E) and
	 * mc_stop(9E) calls.  Otherwise it is STOPPING (while draining
	 * resources) then STOPPED.  When not RUNNING, we will drop incoming
	 * frames and refuse to insert more receive buffers into the receive
	 * queue.
	 */
	vioif_runstate_t		vif_runstate;

	mac_handle_t			vif_mac_handle;

	virtio_queue_t			*vif_rx_vq;
	virtio_queue_t			*vif_tx_vq;
	virtio_queue_t			*vif_ctrl_vq;

	/* TX virtqueue management resources */
	boolean_t			vif_tx_corked;
	boolean_t			vif_tx_drain;
	timeout_id_t			vif_tx_reclaim_tid;

	/*
	 * Configured offload features:
	 */
	unsigned int			vif_tx_csum:1;
	unsigned int			vif_tx_tso4:1;
	unsigned int			vif_tx_tso6:1;

	/*
	 * For debugging, it is useful to know whether the MAC address we
	 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
	 * was otherwise generated or set from within the guest.
	 */
	unsigned int			vif_mac_from_host:1;

	unsigned int			vif_has_ctrlq:1;
	unsigned int			vif_has_ctrlq_rx:1;

	uint_t				vif_mtu;
	uint_t				vif_mtu_max;
	uint8_t				vif_mac[ETHERADDRL];

	/*
	 * Receive buffer free list and accounting:
	 */
	list_t				vif_rxbufs;
	uint_t				vif_nrxbufs_alloc;
	uint_t				vif_nrxbufs_onloan;
	uint_t				vif_nrxbufs_onloan_max;
	uint_t				vif_rxbufs_capacity;
	vioif_rxbuf_t			*vif_rxbufs_mem;

	/*
	 * Transmit buffer free list and accounting:
	 */
	list_t				vif_txbufs;
	uint_t				vif_ntxbufs_alloc;
	uint_t				vif_txbufs_capacity;
	vioif_txbuf_t			*vif_txbufs_mem;

	/*
	 * These copy size thresholds are exposed as private MAC properties so
	 * that they can be tuned without rebooting.
	 */
	uint_t				vif_rxcopy_thresh;
	uint_t				vif_txcopy_thresh;

	list_t				vif_ctrlbufs;
	uint_t				vif_nctrlbufs_alloc;
	uint_t				vif_ctrlbufs_capacity;
	vioif_ctrlbuf_t			*vif_ctrlbufs_mem;

	/*
	 * Statistics visible through mac:
	 */
	uint64_t			vif_ipackets;
	uint64_t			vif_opackets;
	uint64_t			vif_rbytes;
	uint64_t			vif_obytes;
	uint64_t			vif_brdcstxmt;
	uint64_t			vif_brdcstrcv;
	uint64_t			vif_multixmt;
	uint64_t			vif_multircv;
	uint64_t			vif_norecvbuf;
	uint64_t			vif_notxbuf;
	uint64_t			vif_ierrors;
	uint64_t			vif_oerrors;

	/*
	 * Internal debugging statistics:
	 */
	uint64_t			vif_rxfail_dma_handle;
	uint64_t			vif_rxfail_dma_buffer;
	uint64_t			vif_rxfail_dma_bind;
	uint64_t			vif_rxfail_chain_undersize;
	uint64_t			vif_rxfail_no_descriptors;
	uint64_t			vif_txfail_dma_handle;
	uint64_t			vif_txfail_dma_bind;
	uint64_t			vif_txfail_indirect_limit;

	uint64_t			vif_stat_tx_reclaim;

	uint64_t			vif_noctrlbuf;
	uint64_t			vif_ctrlbuf_toosmall;
};

#ifdef __cplusplus
}
#endif

#endif /* _VIOIF_H */