summaryrefslogtreecommitdiff
path: root/usr/src/uts/sun4v/sys/ldc_impl.h
blob: be381ad018b2219b70789767c47bb168eb288a3e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
 */

#ifndef _LDC_IMPL_H
#define	_LDC_IMPL_H

#ifdef __cplusplus
extern "C" {
#endif

#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ioctl.h>

/* Memory map table entries */
#define	LDC_MTBL_ENTRIES	8192	/* 8 K */

/* Define LDC Queue info */
#define	LDC_PACKET_SHIFT	6
#define	LDC_QUEUE_ENTRIES	512
#define	LDC_MTU_MSGS		4
#define	LDC_QUEUE_SIZE		(LDC_QUEUE_ENTRIES << LDC_PACKET_SHIFT)
#define	LDC_DEFAULT_MTU		(LDC_QUEUE_SIZE / LDC_MTU_MSGS)
#define	LDC_RXDQ_MULTIPLIER	2

/*
 * LDC Reliable mode - initial packet seqid
 * - If peer initiated handshake, RDX should contain init_seqid + 1
 * - If this endpoint initiated handshake first data packet should
 *   contain the message init_seqid + 1
 */
#define	LDC_INIT_SEQID	0x0

/* LDC Message types */
#define	LDC_CTRL	0x01	/* Control Pkt */
#define	LDC_DATA	0x02	/* Data Pkt */
#define	LDC_ERR		0x10	/* Error Pkt */

/* LDC Message Subtypes */
#define	LDC_INFO	0x01	/* Control/Data/Error info pkt */
#define	LDC_ACK		0x02	/* Control/Data ACK */
#define	LDC_NACK	0x04	/* Control/Data NACK */

/* LDC Control Messages */
#define	LDC_VER		0x01	/* Version message */
#define	LDC_RTS		0x02	/* Request to Send */
#define	LDC_RTR		0x03	/* Ready To Receive */
#define	LDC_RDX		0x04	/* Ready for data exchange */

#define	LDC_CTRL_MASK	0x0f	/* Mask to read control bits */

/* LDC Channel Transport State (tstate) */
#define	TS_TXQ_RDY	0x01	/* allocated TX queue */
#define	TS_RXQ_RDY	0x02	/* allocated RX queue */
#define	TS_INIT		(TS_TXQ_RDY | TS_RXQ_RDY)
#define	TS_QCONF_RDY	0x04	/* registered queues with HV */
#define	TS_CNEX_RDY	0x08	/* registered channel with cnex */
#define	TS_OPEN		(TS_INIT | TS_QCONF_RDY | TS_CNEX_RDY)
#define	TS_LINK_READY	0x10	/* both endpts registered Rx queues */
#define	TS_READY	(TS_OPEN | TS_LINK_READY)
#define	TS_VER_DONE	0x20	/* negotiated version */
#define	TS_VREADY	(TS_READY | TS_VER_DONE)
#define	TS_HSHAKE_DONE	0x40	/* completed handshake */
#define	TS_UP		(TS_READY | TS_VER_DONE | TS_HSHAKE_DONE)

#define	TS_IN_RESET	0x100	/* channel is in reset state */

/*  LDC Channel Transport Handshake states */
#define	TS_SENT_VER	0x01	/* Sent version */
#define	TS_SENT_RTS	0x02	/* Sent RTS */
#define	TS_RCVD_RTR	0x04	/* Received RTR */
#define	TS_SENT_RDX	0x08	/* Sent RDX */
#define	TS_RCVD_VER	0x10	/* Received version */
#define	TS_RCVD_RTS	0x20	/* Received RTS */
#define	TS_SENT_RTR	0x40	/* Sent RTR */
#define	TS_RCVD_RDX	0x80	/* Received RDX */

/* LDC Interrupt State */
#define	LDC_INTR_NONE	0x00	/* No interrupts */
#define	LDC_INTR_ACTIVE	0x01	/* Interrupt being processed */
#define	LDC_INTR_PEND	0x02	/* Interrupt pending */

/* LDC MSG Envelope */
#define	LDC_LEN_MASK	0x3F
#define	LDC_FRAG_MASK	0xC0

#define	LDC_FRAG_START	0x40	/* frag_info = 0x01 */
#define	LDC_FRAG_STOP	0x80	/* frag_info = 0x02 */
#define	LDC_FRAG_CONT	0x00	/* frag_info = 0x00 */

/*
 * LDC will retry LDC_MAX_RETRIES times when sending or
 * receiving data or if the HV returns back EWOULDBLOCK.
 * Between each retry it will wait LDC_DELAY usecs.
 */
#define	LDC_MAX_RETRIES	1000
#define	LDC_DELAY	1

/* delay(usec) between channel unregister retries in ldc_close() */
#define	LDC_CLOSE_DELAY	1

/*
 * LDC Version information
 */
#define	LDC_PAYLOAD_VER_OFF	8	/* offset of version in payload */

typedef struct ldc_ver {
	uint16_t	major;
	uint16_t	minor;
} ldc_ver_t;

/*
 * Each guest consists of one or more LDC endpoints represented by a ldc_chan
 * structure. Each ldc_chan structure points to a ldc_mtbl structure that
 * contains information about the map table associated with this LDC endpoint.
 * The map table contains the list of pages being shared by this guest over
 * this endpoint with the guest at the other end of this endpoint. Each LDC
 * endpoint also points to a list of memory handles used to bind and export
 * memory segments from this guest. If a memory segment is bound, it points to
 * a memory segment structure, which inturn consists of an array of ldc_page
 * structure for all the pages within that segment. Each ldc_page structure
 * contains information about the shared page and also points to the
 * corresponding entry in the map table.
 *
 * Each LDC endpoint also points to a list of ldc_dring structures that refer
 * to both imported and exported descriptor rings. If it is a exported
 * descriptor ring, it then points to memory handle/memseg corresponding to
 * the region of memory associated with the descriptor ring.
 *
 *     +----------+   +----------+   +----------+
 *     | ldc_chan |-->| ldc_chan |-->| ldc_chan |-->....
 *     +----------+   +----------+   +----------+
 *       |  |  |
 *       |  |  |
 *       |  |  |      +-----------+     +-----------+
 *       |  |  +----->| ldc_dring |---->| ldc_dring |---->......
 *       |  |         +-----------+     +-----------+
 *       |  |               |
 *       |  |               +----------------------------+
 *       |  |                                            |
 *       |  |                                            v
 *       |  |      +----------+     +----------+     +----------+
 *       |  +----->| ldc_mhdl |---->| ldc_mhdl |---->| ldc_mhdl |---> ....
 *       |         +----------+     +----------+     +----------+
 *       v                 |                             |
 *  +----------+           |    +------------+           |    +------------+
 *  | ldc_mtbl |--+        +--->| ldc_memseg |-----+     +--->| ldc_memseg |
 *  +----------+  |             +------------+     |          +------------+
 *                |                   |            |            |       |
 *                v                   v            v            |       v
 *     +--------------+         +----------+  +--------+        |   +--------+
 *     | ldc_mte_slot |<--------| ldc_page |  | cookie |        |   | cookie |
 *     +--------------+         +----------+  +--------+        |   +--------+
 *     | ldc_mte_slot |<--------| ldc_page |  | cookie |        v
 *     +--------------+         +----------+  +--------+   +----------+
 *     | ldc_mte_slot |<-----------------------------------| ldc_page |
 *     +--------------+                                    +----------+
 *     | ldc_mte_slot |
 *     +--------------+
 *     |    ......    |/ +------------+
 *     +--------------+  |   entry    |
 *     | ldc_mte_slot |  +------------+
 *     +--------------+  | inv_cookie |
 *                     \ +------------+
 *
 */

/*
 * Message format of each packet sent over the LDC channel.
 * Each packet is 64-bytes long.
 *
 * Each packet that is sent over LDC can contain either data or acks.
 * The type will reflect the contents. The len will contain in bytes
 * the amount of data being sent. In the case of ACKs, the seqid and
 * data fields will contain the SEQIDs of messages for which ACKs are
 * being sent.
 *
 * Raw pkt format:
 *
 *          +------------------------------------------------------+
 *  0 - 7   |                 data payload                         |
 *          +------------------------------------------------------+
 *
 * Unreliable pkt format:
 *
 *          +------------------------------------------------------+
 *      0   |          seqid          | env  | ctrl | stype | type |
 *          +------------------------------------------------------+
 *  1 - 7   |                 data payload                         |
 *          +------------------------------------------------------+
 *
 * Reliable pkt format:
 *
 *          +------------------------------------------------------+
 *      0   |            seqid        | env  | ctrl | stype | type |
 *          +------------------------------------------------------+
 *      1   |          ackid          |         unused             |
 *          +------------------------------------------------------+
 *  2 - 7   |                 data payload                         |
 *          +------------------------------------------------------+
 */

typedef struct ldc_msg {
	union {
		struct {
			uint8_t		_type;	/* Message type */
			uint8_t		_stype;	/* Message subtype */
			uint8_t		_ctrl;	/* Control/Error Message */
			uint8_t		_env;	/* Message Envelope */
			uint32_t	_seqid;	/* Sequence ID */

			union {
				uint8_t	_ud[LDC_PAYLOAD_SIZE_UNRELIABLE];
						/* Unreliable data payload */
				struct {
					uint32_t _unused;	/* unused */
					uint32_t _ackid;	/* ACK ID */
					uint8_t	_rd[LDC_PAYLOAD_SIZE_RELIABLE];
						/* Reliable data payload */
				} _rl;
			} _data;
		} _tpkt;

		uint8_t		_raw[LDC_PAYLOAD_SIZE_RAW];
	} _pkt;

} ldc_msg_t;

#define	raw		_pkt._raw
#define	type		_pkt._tpkt._type
#define	stype		_pkt._tpkt._stype
#define	ctrl		_pkt._tpkt._ctrl
#define	env		_pkt._tpkt._env
#define	seqid		_pkt._tpkt._seqid
#define	udata		_pkt._tpkt._data._ud
#define	ackid		_pkt._tpkt._data._rl._ackid
#define	rdata		_pkt._tpkt._data._rl._rd

/*
 * LDC Map Table Entry (MTE)
 *
 *   6    6                               1    1  1
 *  |3    0|                       psz|   3|   1| 0| 9| 8| 7|6|5|4|      0|
 *  +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+
 *  | rsvd |           PFN            | 0  | 0  |CW|CR|IW|IR|X|W|R| pgszc |
 *  +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+
 *  |                       hv invalidation cookie                        |
 *  +---------------------------------------------------------------------+
 */
typedef union {
	struct {
		uint64_t	_rsvd2:8,	/* <63:56> reserved */
				rpfn:43,	/* <55:13> real pfn */
				_rsvd1:2,	/* <12:11> reserved */
				cw:1,		/* <10> copy write access */
				cr:1,		/* <9> copy read perm */
				iw:1,		/* <8> iommu write perm */
				ir:1,		/* <7> iommu read perm */
				x:1,		/* <6> execute perm */
				w:1,		/* <5> write perm */
				r:1,		/* <4> read perm */
				pgszc:4;	/* <3:0> pgsz code */
	} mte_bit;

	uint64_t		ll;

} ldc_mte_t;

#define	mte_rpfn	mte_bit.rpfn
#define	mte_cw		mte_bit.cw
#define	mte_cr		mte_bit.cr
#define	mte_iw		mte_bit.iw
#define	mte_ir		mte_bit.ir
#define	mte_x		mte_bit.x
#define	mte_w		mte_bit.w
#define	mte_r		mte_bit.r
#define	mte_pgszc	mte_bit.pgszc

#define	MTE_BSZS_SHIFT(sz)	((sz) * 3)
#define	MTEBYTES(sz)		(MMU_PAGESIZE << MTE_BSZS_SHIFT(sz))
#define	MTEPAGES(sz)		(1 << MTE_BSZS_SHIFT(sz))
#define	MTE_PAGE_SHIFT(sz)	(MMU_PAGESHIFT + MTE_BSZS_SHIFT(sz))
#define	MTE_PAGE_OFFSET(sz)	(MTEBYTES(sz) - 1)
#define	MTE_PAGEMASK(sz)	(~MTE_PAGE_OFFSET(sz))
#define	MTE_PFNMASK(sz)		(~(MTE_PAGE_OFFSET(sz) >> MMU_PAGESHIFT))

/*
 * LDC Map Table Slot
 */
typedef struct ldc_mte_slot {
	ldc_mte_t	entry;
	uint64_t	cookie;
} ldc_mte_slot_t;

/*
 * LDC Memory Map Table
 *
 * Each LDC has a memory map table it uses to list all the pages
 * it exporting to its peer over the channel. This structure
 * contains information about the map table and is pointed to
 * by the ldc_chan structure.
 */
typedef struct ldc_mtbl {
	kmutex_t		lock;		/* Table lock */
	size_t			size;		/* Table size (in bytes) */
	uint64_t		next_entry;	/* Next entry to use */
	uint64_t		num_entries;	/* Num entries in table */
	uint64_t		num_avail;	/* Num of available entries */
	boolean_t		contigmem;	/* TRUE=Contig mem alloc'd */
	ldc_mte_slot_t		*table;		/* The table itself */
} ldc_mtbl_t;

/*
 * LDC page and memory segment information
 */
typedef struct ldc_page {
	uintptr_t		raddr;		/* Exported page RA */
	uint64_t		index;		/* Index in map table */
	ldc_mte_slot_t		*mte;		/* Map table entry */
} ldc_page_t;

typedef struct ldc_memseg {
	caddr_t			vaddr;		/* Exported segment VA */
	uintptr_t		raddr;		/* Exported segment VA */
	size_t			size;		/* Exported segment size */
	uint64_t		npages;		/* Number of pages */
	ldc_page_t		*pages;		/* Array of exported pages */
	uint32_t		ncookies;	/* Number of cookies */
	ldc_mem_cookie_t	*cookies;
	uint64_t		next_cookie;	/* Index to next cookie */
} ldc_memseg_t;

/*
 * LDC Cookie address format
 *
 *   6       6          m+n
 *  |3|      0|          |                  m|                  0|
 *  +-+-------+----------+-------------------+-------------------+
 *  |X| pgszc |   rsvd   |      table_idx    |     page_offset   |
 *  +-+-------+----------+-------------------+-------------------+
 */
#define	LDC_COOKIE_PGSZC_MASK	0x7
#define	LDC_COOKIE_PGSZC_SHIFT	60

/*
 * LDC Memory handle
 */
typedef struct ldc_chan ldc_chan_t;

typedef struct ldc_mhdl {
	kmutex_t		lock;		/* Mutex for memory handle */
	ldc_mstatus_t		status;		/* Memory map status */

	uint8_t			mtype;		/* Type of sharing */
	uint8_t			perm;		/* Access permissions */
	boolean_t		myshadow;	/* TRUE=alloc'd shadow mem */

	ldc_chan_t		*ldcp;		/* Pointer to channel struct */
	ldc_memseg_t		*memseg;	/* Bound memory segment */
	struct ldc_mhdl		*next;		/* Next memory handle */
} ldc_mhdl_t;

/*
 * LDC Descriptor rings
 */

typedef struct ldc_dring {
	kmutex_t		lock;		/* Desc ring lock */
	ldc_mstatus_t		status;		/* Desc ring status */

	uint32_t		dsize;		/* Descriptor size */
	uint32_t		length;		/* Descriptor ring length */
	uint64_t		size;		/* Desc ring size (in bytes) */
	caddr_t			base;		/* Descriptor ring base addr */

	ldc_chan_t		*ldcp;		/* Pointer to bound channel */
	ldc_mem_handle_t	mhdl;		/* Mem handle to desc ring */

	struct ldc_dring	*ch_next;	/* Next dring in channel */
	struct ldc_dring	*next;		/* Next dring overall */

} ldc_dring_t;


/*
 * Channel specific information is kept in a separate
 * structure. These are then stored on a array indexed
 * by the channel number.
 */
struct ldc_chan {
	ldc_chan_t	*next;		/* Next channel */

	kmutex_t	lock;		/* Channel lock */
	uint64_t	id;		/* Channel ID */
	ldc_status_t	status;		/* Channel status */
	uint32_t	tstate;		/* Channel transport state */
	uint32_t	hstate;		/* Channel transport handshake state */

	ldc_dev_t	devclass;	/* Associated device class */
	uint64_t	devinst;	/* Associated device instance */
	ldc_mode_t	mode;		/* Channel mode */

	uint64_t	mtu;		/* Max TU size */

	ldc_ver_t	version;	/* Channel version */
	uint32_t	next_vidx;	/* Next version to match */

	uint_t		(*cb)(uint64_t event, caddr_t arg);
	caddr_t		cb_arg;		/* Channel callback and arg */
	boolean_t	cb_inprogress;	/* Channel callback in progress */
	boolean_t	cb_enabled;	/* Channel callbacks are enabled */

	uint8_t		tx_intr_state;	/* Tx interrupt state */
	uint8_t		rx_intr_state;	/* Rx interrupt state */

	kmutex_t	tx_lock;	/* Transmit lock */
	uint64_t	tx_q_entries;	/* Num entries in transmit queue */
	uint64_t	tx_q_va;	/* Virtual addr of transmit queue */
	uint64_t	tx_q_ra;	/* Real addr of transmit queue */
	uint64_t	tx_head;	/* Tx queue head */
	uint64_t	tx_ackd_head;	/* Tx queue ACKd head (Reliable) */
	uint64_t	tx_tail;	/* Tx queue tail */

	uint64_t	rx_q_entries;	/* Num entries in receive queue */
	uint64_t	rx_q_va;	/* Virtual addr of receive queue */
	uint64_t	rx_q_ra;	/* Real addr of receive queue */

	uint64_t	rx_dq_entries;	/* Num entries in the data queue */
	uint64_t	rx_dq_va;	/* Virtual addr of the data queue */
	uint64_t	rx_dq_head;	/* Receive data queue head */
	uint64_t	rx_dq_tail;	/* Receive data queue tail */
	uint64_t	rx_ack_head;	/* Receive data ACK peek head ptr */

	uint64_t	link_state;	/* Underlying HV channel state */

	ldc_mtbl_t	*mtbl;		/* Memory table used by channel */
	ldc_mhdl_t	*mhdl_list;	/* List of memory handles */
	kmutex_t	mlist_lock;	/* Mem handle list lock */

	ldc_dring_t	*exp_dring_list; /* Exported desc ring list */
	kmutex_t	exp_dlist_lock;	/* Lock for exported desc ring list */
	ldc_dring_t	*imp_dring_list; /* Imported desc ring list */
	kmutex_t	imp_dlist_lock;	/* Lock for imported desc ring list */

	uint8_t		pkt_payload;	/* Size of packet payload */

	uint32_t	last_msg_snt;	/* Seqid of last packet sent */
	uint32_t	last_ack_rcd;	/* Seqid of last ACK recd */
	uint32_t	last_msg_rcd;	/* Seqid of last packet received */

	uint32_t	stream_remains;	/* Number of bytes in stream */
					/* packet buffer */
	uint32_t	stream_offset;	/* Offset into packet buffer for */
					/* next read */
	uint8_t		*stream_bufferp; /* Stream packet buffer */

	int		(*read_p)(ldc_chan_t *ldcp, caddr_t bufferp,
				size_t *sizep);
	int		(*write_p)(ldc_chan_t *ldcp, caddr_t bufferp,
				size_t *sizep);

	uint64_t	(*readq_get_state)(ldc_chan_t *ldcp, uint64_t *head,
				uint64_t *tail, uint64_t *link_state);

	int		(*readq_set_head)(ldc_chan_t *ldcp, uint64_t head);
};


/*
 * LDC module soft state structure
 */
typedef struct ldc_soft_state {
	kmutex_t	lock;		/* Protects ldc_soft_state_t  */
	ldc_cnex_t	cinfo;		/* channel nexus info */
	uint64_t	channel_count;	/* Number of channels */
	uint64_t	channels_open;	/* Number of open channels */
	ldc_chan_t	*chan_list;	/* List of LDC endpoints */
	ldc_dring_t	*dring_list;	/* Descriptor rings (for export) */

	kmem_cache_t	*memhdl_cache;	/* Memory handle cache */
	kmem_cache_t	*memseg_cache;	/* Memory segment cache */

	uint64_t	mapin_size;		/* Total mapin sz per guest  */
} ldc_soft_state_t;


/*
 * Debugging Utilities
 */
#define	DBG_ALL_LDCS	-1
#ifdef	DEBUG
#define	D1		\
if (ldcdbg & 0x01)	\
	ldcdebug
#define	D2		\
if (ldcdbg & 0x02)	\
	ldcdebug
#define	DWARN		\
if (ldcdbg & 0x04)	\
	ldcdebug
#else
#define	D1(...)
#define	D2(...)
#define	DWARN(...)
#endif

#ifdef __cplusplus
}
#endif

#endif /* _LDC_IMPL_H */