1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
|
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* This header file contains the basic data structures which the
* virtual switch (vsw) uses to communicate with vnet clients.
*
* The virtual switch reads the machine description (MD) to
* determine how many port_t structures to create (each port_t
* can support communications to a single network device). The
* port_t's are maintained in a linked list.
*
* Each port in turn contains a number of logical domain channels
* (ldc's) which are inter domain communications channels which
* are used for passing small messages between the domains. There
* may be any number of channels associated with each port, though
* currently most devices only have a single channel. The current
* implementation provides support for only one channel per port.
*
* The ldc is a bi-directional channel, which is divided up into
* two directional 'lanes', one outbound from the switch to the
* virtual network device, the other inbound to the switch.
* Depending on the type of device each lane may have seperate
* communication paramaters (such as mtu etc).
*
* For those network clients which use descriptor rings the
* rings are associated with the appropriate lane. I.e. rings
* which the switch exports are associated with the outbound lanes
* while those which the network clients are exporting to the switch
* are associated with the inbound lane.
*
* In diagram form the data structures look as follows:
*
* vsw instance
* |
* +----->port_t----->port_t----->port_t----->
* |
* +--->ldc_t
* |
* +--->lane_t (inbound)
* | |
* | +--->dring
* |
* +--->lane_t (outbound)
* |
* +--->dring
*
*/
#ifndef _VSW_LDC_H
#define _VSW_LDC_H
#ifdef __cplusplus
extern "C" {
#endif
/*
* LDC pkt tranfer MTU - largest msg size used
*/
#define VSW_LDC_MTU 64
#define VSW_DEF_MSG_WORDS \
(VNET_DRING_REG_EXT_MSG_SIZE_MAX / sizeof (uint64_t))
/*
* Default message type.
*/
typedef struct def_msg {
uint64_t data[VSW_DEF_MSG_WORDS];
} def_msg_t;
/*
* Currently only support one major/minor pair.
*/
#define VSW_NUM_VER 1
typedef struct ver_sup {
uint16_t ver_major; /* major version number */
uint16_t ver_minor; /* minor version number */
} ver_sup_t;
/*
* Lane states.
*/
#define VSW_LANE_INACTIV 0x0 /* No params set for lane */
#define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */
#define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */
#define VSW_VER_ACK_RECV 0x4
#define VSW_VER_ACK_SENT 0x8
#define VSW_VER_NACK_RECV 0x10
#define VSW_VER_NACK_SENT 0x20
#define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */
#define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */
#define VSW_ATTR_ACK_SENT 0x100
#define VSW_ATTR_ACK_RECV 0x200
#define VSW_ATTR_NACK_SENT 0x400
#define VSW_ATTR_NACK_RECV 0x800
#define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */
#define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */
#define VSW_DRING_ACK_SENT 0x4000
#define VSW_DRING_ACK_RECV 0x8000
#define VSW_DRING_NACK_SENT 0x10000
#define VSW_DRING_NACK_RECV 0x20000
#define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */
#define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */
#define VSW_RDX_ACK_SENT 0x100000
#define VSW_RDX_ACK_RECV 0x200000
#define VSW_RDX_NACK_SENT 0x400000
#define VSW_RDX_NACK_RECV 0x800000
#define VSW_MCST_INFO_SENT 0x1000000
#define VSW_MCST_INFO_RECV 0x2000000
#define VSW_MCST_ACK_SENT 0x4000000
#define VSW_MCST_ACK_RECV 0x8000000
#define VSW_MCST_NACK_SENT 0x10000000
#define VSW_MCST_NACK_RECV 0x20000000
#define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */
/* Handshake milestones */
#define VSW_MILESTONE0 0x1 /* ver info exchanged */
#define VSW_MILESTONE1 0x2 /* attribute exchanged */
#define VSW_MILESTONE2 0x4 /* dring info exchanged */
#define VSW_MILESTONE3 0x8 /* rdx exchanged */
#define VSW_MILESTONE4 0x10 /* handshake complete */
/*
* Lane direction (relative to ourselves).
*/
#define INBOUND 0x1
#define OUTBOUND 0x2
/* Peer session id received */
#define VSW_PEER_SESSION 0x1
/*
* Maximum number of consecutive reads of data from channel
*/
#define VSW_MAX_CHAN_READ 50
/*
* Currently only support one ldc per port.
*/
#define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */
/*
* Used for port add/deletion.
*/
#define VSW_PORT_UPDATED 0x1
#define LDC_TX_SUCCESS 0 /* ldc transmit success */
#define LDC_TX_FAILURE 1 /* ldc transmit failure */
#define LDC_TX_NORESOURCES 2 /* out of descriptors */
/*
* Descriptor ring info
*
* Each descriptor element has a pre-allocated data buffer
* associated with it, into which data being transmitted is
* copied. By pre-allocating we speed up the copying process.
* The buffer is re-used once the peer has indicated that it is
* finished with the descriptor.
*/
#define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */
#define VSW_PRIV_SIZE sizeof (vnet_private_desc_t)
#define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2)
/*
* Size of the mblk in each mblk pool.
*/
#define VSW_MBLK_SZ_128 128
#define VSW_MBLK_SZ_256 256
#define VSW_MBLK_SZ_2048 2048
/*
* Number of mblks in each mblk pool.
*/
#define VSW_NUM_MBLKS 1024
/*
* Number of rcv buffers in RxDringData mode
*/
#define VSW_RXDRING_NRBUFS (vsw_num_descriptors * vsw_nrbufs_factor)
/* increment recv index */
#define INCR_DESC_INDEX(dp, i) \
((i) = (((i) + 1) & ((dp)->num_descriptors - 1)))
/* decrement recv index */
#define DECR_DESC_INDEX(dp, i) \
((i) = (((i) - 1) & ((dp)->num_descriptors - 1)))
#define INCR_TXI INCR_DESC_INDEX
#define DECR_TXI DECR_DESC_INDEX
#define INCR_RXI INCR_DESC_INDEX
#define DECR_RXI DECR_DESC_INDEX
/* bounds check rx index */
#define CHECK_DESC_INDEX(dp, i) \
(((i) >= 0) && ((i) < (dp)->num_descriptors))
#define CHECK_RXI CHECK_DESC_INDEX
#define CHECK_TXI CHECK_DESC_INDEX
/*
* Private descriptor
*/
typedef struct vsw_private_desc {
/*
* Below lock must be held when accessing the state of
* a descriptor on either the private or public sections
* of the ring.
*/
kmutex_t dstate_lock;
uint64_t dstate;
vnet_public_desc_t *descp;
ldc_mem_handle_t memhandle;
void *datap;
uint64_t datalen;
uint64_t ncookies;
ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES];
int bound;
} vsw_private_desc_t;
/*
* Descriptor ring structure
*/
typedef struct dring_info {
kmutex_t dlock; /* sync access */
uint32_t num_descriptors; /* # of descriptors */
uint32_t descriptor_size; /* size of descriptor */
uint32_t options; /* dring options (mode) */
ldc_dring_handle_t dring_handle; /* dring LDC handle */
uint32_t dring_ncookies; /* # of dring cookies */
ldc_mem_cookie_t dring_cookie[1]; /* LDC cookie of dring */
ldc_mem_handle_t data_handle; /* data area LDC handle */
uint32_t data_ncookies; /* # of data area cookies */
ldc_mem_cookie_t *data_cookie; /* data area LDC cookies */
uint64_t ident; /* identifier sent to peer */
uint64_t end_idx; /* last idx processed */
int64_t last_ack_recv; /* last ack received */
kmutex_t txlock; /* protect tx desc alloc */
uint32_t next_txi; /* next tx descriptor index */
uint32_t next_rxi; /* next expected recv index */
kmutex_t restart_lock; /* protect restart_reqd */
boolean_t restart_reqd; /* send restart msg */
uint32_t restart_peer_txi; /* index to restart peer */
void *pub_addr; /* base of public section */
void *priv_addr; /* base of private section */
void *data_addr; /* base of data section */
size_t data_sz; /* size of data section */
size_t desc_data_sz; /* size of descr data blk */
uint8_t dring_mtype; /* dring mem map type */
uint32_t num_bufs; /* # of buffers */
vio_mblk_pool_t *rx_vmp; /* rx mblk pool */
vio_mblk_t **rxdp_to_vmp; /* descr to buf map tbl */
} dring_info_t;
/*
* Each ldc connection is comprised of two lanes, incoming
* from a peer, and outgoing to that peer. Each lane shares
* common ldc parameters and also has private lane-specific
* parameters.
*/
typedef struct lane {
uint64_t lstate; /* Lane state */
uint16_t ver_major; /* Version major number */
uint16_t ver_minor; /* Version minor number */
uint64_t seq_num; /* Sequence number */
uint64_t mtu; /* ETHERMTU */
uint64_t addr; /* Unique physical address */
uint8_t addr_type; /* Only MAC address at moment */
uint8_t xfer_mode; /* Dring or Pkt based */
uint8_t ack_freq; /* Only non zero for Pkt based xfer */
uint32_t physlink_update; /* physlink updates */
uint8_t dring_mode; /* Descriptor ring mode */
dring_info_t *dringp; /* List of drings for this lane */
} lane_t;
/* channel drain states */
#define VSW_LDC_INIT 0x1 /* Initial non-drain state */
#define VSW_LDC_DRAINING 0x2 /* Channel draining */
/*
* vnet-protocol-version dependent function prototypes.
*/
typedef int (*vsw_ldctx_t) (void *, mblk_t *, mblk_t *, uint32_t);
typedef void (*vsw_ldcrx_pktdata_t) (void *, void *, uint32_t);
typedef void (*vsw_ldcrx_dringdata_t) (void *, void *);
/* ldc information associated with a vsw-port */
typedef struct vsw_ldc {
struct vsw_ldc *ldc_next; /* next ldc in the list */
struct vsw_port *ldc_port; /* associated port */
struct vsw *ldc_vswp; /* associated vsw */
kmutex_t ldc_cblock; /* sync callback processing */
kmutex_t ldc_txlock; /* sync transmits */
kmutex_t ldc_rxlock; /* sync rx */
uint64_t ldc_id; /* channel number */
ldc_handle_t ldc_handle; /* channel handle */
kmutex_t drain_cv_lock;
kcondvar_t drain_cv; /* channel draining */
int drain_state;
uint32_t hphase; /* handshake phase */
int hcnt; /* # handshake attempts */
kmutex_t status_lock;
ldc_status_t ldc_status; /* channel status */
uint8_t reset_active; /* reset flag */
uint64_t local_session; /* Our session id */
uint64_t peer_session; /* Our peers session id */
uint8_t session_status; /* Session recv'd, sent */
uint32_t hss_id; /* Handshake session id */
uint64_t next_ident; /* Next dring ident # to use */
lane_t lane_in; /* Inbound lane */
lane_t lane_out; /* Outbound lane */
uint8_t dev_class; /* Peer device class */
boolean_t pls_negotiated; /* phys link state update ? */
vio_multi_pool_t vmp; /* Receive mblk pools */
uint32_t max_rxpool_size; /* max size of rxpool in use */
uint64_t *ldcmsg; /* msg buffer for ldc_read() */
uint64_t msglen; /* size of ldcmsg */
uint32_t dringdata_msgid; /* msgid in RxDringData mode */
/* tx thread fields */
kthread_t *tx_thread; /* tx thread */
uint32_t tx_thr_flags; /* tx thread flags */
kmutex_t tx_thr_lock; /* lock for tx thread */
kcondvar_t tx_thr_cv; /* cond.var for tx thread */
mblk_t *tx_mhead; /* tx mblks head */
mblk_t *tx_mtail; /* tx mblks tail */
uint32_t tx_cnt; /* # of pkts queued for tx */
/* message thread fields */
kthread_t *msg_thread; /* message thread */
uint32_t msg_thr_flags; /* message thread flags */
kmutex_t msg_thr_lock; /* lock for message thread */
kcondvar_t msg_thr_cv; /* cond.var for msg thread */
/* receive thread fields */
kthread_t *rcv_thread; /* receive thread */
uint32_t rcv_thr_flags; /* receive thread flags */
kmutex_t rcv_thr_lock; /* lock for receive thread */
kcondvar_t rcv_thr_cv; /* cond.var for recv thread */
vsw_ldctx_t tx; /* transmit function */
vsw_ldcrx_pktdata_t rx_pktdata; /* process raw data msg */
vsw_ldcrx_dringdata_t rx_dringdata; /* process dring data msg */
/* channel statistics */
vgen_stats_t ldc_stats; /* channel statistics */
kstat_t *ksp; /* channel kstats */
} vsw_ldc_t;
/* worker thread flags */
#define VSW_WTHR_DATARCVD 0x01 /* data received */
#define VSW_WTHR_STOP 0x02 /* stop worker thread request */
/* multicast addresses port is interested in */
typedef struct mcst_addr {
struct mcst_addr *nextp;
struct ether_addr mca; /* multicast address */
uint64_t addr; /* mcast addr converted to hash key */
boolean_t mac_added; /* added into physical device */
} mcst_addr_t;
/* Port detach states */
#define VSW_PORT_INIT 0x1 /* Initial non-detach state */
#define VSW_PORT_DETACHING 0x2 /* In process of being detached */
#define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */
/* port information associated with a vsw */
typedef struct vsw_port {
int p_instance; /* port instance */
struct vsw_port *p_next; /* next port in the list */
struct vsw *p_vswp; /* associated vsw */
int num_ldcs; /* # of ldcs in the port */
uint64_t *ldc_ids; /* ldc ids */
vsw_ldc_t *ldcp; /* ldc for this port */
kmutex_t tx_lock; /* transmit lock */
int (*transmit)(vsw_ldc_t *, mblk_t *);
int state; /* port state */
kmutex_t state_lock;
kcondvar_t state_cv;
krwlock_t maccl_rwlock; /* protect fields below */
mac_client_handle_t p_mch; /* mac client handle */
mac_unicast_handle_t p_muh; /* mac unicast handle */
kmutex_t mca_lock; /* multicast lock */
mcst_addr_t *mcap; /* list of multicast addrs */
boolean_t addr_set; /* Addr set where */
/*
* mac address of the port & connected device
*/
struct ether_addr p_macaddr;
uint16_t pvid; /* port vlan id (untagged) */
struct vsw_vlanid *vids; /* vlan ids (tagged) */
uint16_t nvids; /* # of vids */
mod_hash_t *vlan_hashp; /* vlan hash table */
uint32_t vlan_nchains; /* # of vlan hash chains */
/* HybridIO related info */
uint32_t p_hio_enabled; /* Hybrid mode enabled? */
uint32_t p_hio_capable; /* Port capable of HIO */
/* bandwidth limit */
uint64_t p_bandwidth; /* bandwidth limit */
} vsw_port_t;
/* list of ports per vsw */
typedef struct vsw_port_list {
vsw_port_t *head; /* head of the list */
krwlock_t lockrw; /* sync access(rw) to the list */
int num_ports; /* number of ports in the list */
} vsw_port_list_t;
/*
* Taskq control message
*/
typedef struct vsw_ctrl_task {
vsw_ldc_t *ldcp;
def_msg_t pktp;
uint32_t hss_id;
} vsw_ctrl_task_t;
/*
* State of connection to peer. Some of these states
* can be mapped to LDC events as follows:
*
* VSW_CONN_RESET -> LDC_RESET_EVT
* VSW_CONN_UP -> LDC_UP_EVT
*/
#define VSW_CONN_UP 0x1 /* Connection come up */
#define VSW_CONN_RESET 0x2 /* Connection reset */
#define VSW_CONN_RESTART 0x4 /* Restarting handshake on connection */
typedef struct vsw_conn_evt {
uint16_t evt; /* Connection event */
vsw_ldc_t *ldcp;
} vsw_conn_evt_t;
/*
* Ethernet broadcast address definition.
*/
static struct ether_addr etherbroadcastaddr = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};
#define IS_BROADCAST(ehp) \
(bcmp(&ehp->ether_dhost, ðerbroadcastaddr, ETHERADDRL) == 0)
#define IS_MULTICAST(ehp) \
((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
#define READ_ENTER(x) rw_enter(x, RW_READER)
#define WRITE_ENTER(x) rw_enter(x, RW_WRITER)
#define RW_EXIT(x) rw_exit(x)
#define VSW_PORT_REFHOLD(portp) atomic_inc_32(&((portp)->ref_cnt))
#define VSW_PORT_REFRELE(portp) atomic_dec_32(&((portp)->ref_cnt))
#ifdef __cplusplus
}
#endif
#endif /* _VSW_LDC_H */
|