summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/rpc/rpc_rdma.h
blob: 875d3cacb200a2e2d63a42646b86759d5ddf1776 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef	_RPC_RPC_RDMA_H
#define	_RPC_RPC_RDMA_H

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <rpc/rpc.h>
#include <rpc/rpc_sztypes.h>
#include <sys/sunddi.h>
#include <sys/sunldi.h>

#ifdef __cplusplus
extern "C" {
#endif

#define	RPCRDMA_VERS	0	/* Version of the RPC over RDMA protocol */
#define	RDMATF_VERS	1	/* Version of the API used by RPC for RDMA */
#define	RDMATF_VERS_1	1	/* Current version of RDMATF */

/*
 * The size of an RPC call or reply message
 */
#define	RPC_MSG_SZ  1024

/*
 * Storage for a chunk list
 */
#define	RPC_CL_SZ  1024

/*
 * Chunk size
 */
#define	MINCHUNK  1024

/*
 * Size of receive buffer
 */
#define	RPC_BUF_SIZE	2048

#define	NOWAIT	0	/* don't wait for operation of complete */
#define	WAIT	1	/* wait and ensure that operation is complete */

/*
 * RDMA xdr buffer control and other control flags. Add new flags here,
 * set them in private structure for xdr over RDMA in xdr_rdma.c
 */
#define	RDMA_NOCHUNK		0x1

/*
 * Return codes from RDMA operations
 */
typedef enum {

	RDMA_SUCCESS = 0,	/* successful operation */

	RDMA_INVAL = 1,		/* invalid parameter */
	RDMA_TIMEDOUT = 2,	/* operation timed out */
	RDMA_INTR = 3,		/* operation interrupted */
	RDMA_NORESOURCE = 4,	/* insufficient resource */
	/*
	 * connection errors
	 */
	RDMA_REJECT = 5,	/* connection req rejected */
	RDMA_NOLISTENER = 6,	/* no listener on server */
	RDMA_UNREACHABLE = 7,	/* host unreachable */
	RDMA_CONNLOST = 8,	/* connection lost */

	RDMA_XPRTFAILED = 9,	/* RDMA transport failed */
	RDMA_PROTECTERR = 10,	/* memory protection error */
	RDMA_OVERRUN = 11,	/* transport overrun */
	RDMA_RECVQEMPTY = 12,	/* incoming pkt dropped, recv q empty */
	RDMA_PROTFAILED = 13,	/* RDMA protocol failed */
	RDMA_NOTSUPP = 14,	/* requested feature not supported */
	RDMA_REMOTERR = 15,	/* error at remote end */
	/*
	 * RDMATF errors
	 */
	RDMA_BADVERS = 16,	/* mismatch RDMATF versions */
	RDMA_REG_EXIST = 17,	/* RDMATF registration already exists */

	/*
	 * fallback error
	 */
	RDMA_FAILED = 18	/* generic error */
} rdma_stat;

/*
 * Memory region context. This is an RDMA provider generated
 * handle for a registered arbitrary size contiguous virtual
 * memory. The RDMA Interface Adapter needs this for local or
 * remote memory access.
 *
 * The mrc_rmr field holds the remote memory region context
 * which is sent over-the-wire to provide the remote host
 * with RDMA access to the memory region.
 */
struct mrc {
	uint32_t	mrc_rmr;	/* Remote MR context, sent OTW */
	union {
		struct mr {
			uint32_t	lmr; 	/* Local MR context */
			uint64_t	linfo;	/* Local memory info */
		} mr;
	} lhdl;
};

#define	mrc_lmr		lhdl.mr.lmr
#define	mrc_linfo	lhdl.mr.linfo

/*
 * The XDR offset value is used by the XDR
 * routine to identify the position in the
 * RPC message where the opaque object would
 * normally occur. Neither the data content
 * of the chunk, nor its size field are included
 * in the RPC message.  The XDR offset is calculated
 * as if the chunks were present.
 *
 * The remaining fields identify the chunk of data
 * on the sender.  The c_memhandle identifies a
 * registered RDMA memory region and the c_addr
 * and c_len fields identify the chunk within it.
 */
struct clist {
	uint32		c_xdroff;	/* XDR offset */
	uint32		c_len;		/* Length */
	struct mrc	c_smemhandle;	/* src memory handle */
	uint64 		c_ssynchandle;	/* src sync handle */
	uint64		c_saddr;	/* src address */
	struct mrc	c_dmemhandle;	/* dst memory handle */
	uint64		c_dsynchandle;	/* dst sync handle */
	uint64		c_daddr;	/* dst address */
	struct clist	*c_next;	/* Next chunk */
};

typedef struct clist clist;

enum rdma_proc {
	RDMA_MSG	= 0,	/* chunk list and RPC msg follow */
	RDMA_NOMSG	= 1,	/* only chunk list follows */
	RDMA_MSGP	= 2,	/* chunk list and RPC msg with padding follow */
	RDMA_DONE	= 3	/* signal completion of chunk transfer */
};

/*
 * Listener information for a service
 */
struct rdma_svc_data {
	queue_t		q;	/* queue_t to place incoming pkts */
	int		active;	/* If active, after registeration startup */
	rdma_stat	err_code;	/* Error code from plugin layer */
	int32_t		svcid;		/* RDMA based service identifier */
};

/*
 * Per RDMA plugin module information.
 * Will be populated by each plugin
 * module during its initialization.
 */
typedef struct rdma_mod {
	char 		*rdma_api;		/* "kvipl", "ibtf", etc */
	uint_t 		rdma_version;		/* RDMATF API version */
	int		rdma_count;		/* # of devices */
	struct rdmaops 	*rdma_ops;		/* rdma op vector for api */
} rdma_mod_t;

/*
 * Registry of RDMA plugins
 */
typedef struct rdma_registry {
	rdma_mod_t	*r_mod;		/* plugin mod info */
	struct rdma_registry *r_next;	/* next registered RDMA plugin */
} rdma_registry_t;

/*
 * RDMA transport information
 */
typedef struct rdma_info {
	uint_t	addrlen;	/* address length */
	uint_t  mts;		/* max transfer size */
	uint_t  mtu;		/* native mtu size of unlerlying network */
} rdma_info_t;

/*
 * RDMA Connection information
 */
typedef struct conn {
	rdma_mod_t	*c_rdmamod;	/* RDMA transport info for conn */
	struct netbuf	c_raddr;	/* remote address */
	struct netbuf	c_laddr;	/* local address */
	int		c_ref;		/* no. of clients of connection */
	struct conn	*c_next;	/* next in list of connections */
	struct conn	*c_prev;	/* prev in list of connections */
	caddr_t		c_private;	/* transport specific stuff */

#define	C_IDLE		0x80000000
#define	C_CONN_PEND	0x40000000
#define	C_CONNECTED	0x20000000
#define	C_ERROR		0x10000000
#define	C_DISCONN_PEND	0x08000000
#define	C_REMOTE_DOWN	0x04000000

	uint_t		c_state;	/* state of connection */
	kmutex_t	c_lock;		/* protect c_state and c_ref fields */
	kcondvar_t	c_cv;		/* to signal when pending is done */
} CONN;


/*
 * Memory management for the RDMA buffers
 */
/*
 * RDMA buffer types
 */
typedef enum {
	SEND_BUFFER,	/* buf for send msg */
	SEND_DESCRIPTOR, /* buf used for send msg descriptor in plugins only */
	RECV_BUFFER,	/* buf for recv msg */
	RECV_DESCRIPTOR, /* buf used for recv msg descriptor in plugins only */
	CHUNK_BUFFER	/* chunk buf used in RDMATF only and not in plugins */
} rdma_btype;

/*
 * RDMA buffer information
 */
typedef struct rdma_buf {
	rdma_btype	type;	/* buffer type */
	int		len;	/* length of buffer */
	caddr_t		addr;	/* buffer address */
	struct mrc	handle;	/* buffer registration handle */
} rdma_buf_t;

/*
 * Data transferred from plugin interrupt to svc_queuereq()
 */
struct recv_data {
	CONN		*conn;
	int		status;
	rdma_buf_t	rpcmsg;
};

/*
 * Operations vector for RDMA transports.
 */
typedef struct rdmaops {
	/* Network */
	rdma_stat	(*rdma_reachable)(int addr_type, struct netbuf *,
						void **handle);
	/* Connection */
	rdma_stat	(*rdma_get_conn)(struct netbuf *, int addr_type,
						void *, CONN **);
	rdma_stat	(*rdma_rel_conn)(CONN *);
	/* Server side listner start and stop routines */
	void		(*rdma_svc_listen)(struct rdma_svc_data *);
	void		(*rdma_svc_stop)(struct rdma_svc_data *);
	/* Memory */
	rdma_stat	(*rdma_regmem)(CONN *, caddr_t, uint_t, struct mrc *);
	rdma_stat	(*rdma_deregmem)(CONN *, caddr_t, struct mrc);
	rdma_stat	(*rdma_regmemsync)(CONN *, caddr_t, uint_t,
				struct mrc *, void **);
	rdma_stat	(*rdma_deregmemsync)(CONN *, caddr_t, struct mrc,
				void *);
	rdma_stat	(*rdma_syncmem)(CONN *, void *, caddr_t, int, int);
	/* Buffer */
	rdma_stat	(*rdma_buf_alloc)(CONN *, rdma_buf_t *);
	void		(*rdma_buf_free)(CONN *, rdma_buf_t *);
	/* Transfer */
	rdma_stat	(*rdma_send)(CONN *, clist *, uint32_t);
	rdma_stat	(*rdma_send_resp)(CONN *, clist *, uint32_t);
	rdma_stat	(*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t);
	rdma_stat	(*rdma_svc_recvbuf)(CONN *, clist *);
	rdma_stat	(*rdma_recv)(CONN *, clist **, uint32_t);
	/* RDMA */
	rdma_stat	(*rdma_read)(CONN *, clist *, int);
	rdma_stat	(*rdma_write)(CONN *, clist *, int);
	/* INFO */
	rdma_stat	(*rdma_getinfo)(rdma_info_t *info);

} rdmaops_t;

/*
 * RDMA operations.
 */
#define	RDMA_REACHABLE(rdma_ops, addr_type, addr, handle)	\
	(*(rdma_ops)->rdma_reachable)(addr_type, addr, handle)

#define	RDMA_GET_CONN(rdma_ops, addr, addr_type, handle, conn)	\
	(*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn)

#define	RDMA_REL_CONN(conn)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn)

#define	RDMA_REGMEM(conn, buff, len, handle)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, buff, len, handle)

#define	RDMA_DEREGMEM(conn, buff, handle)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle)

#define	RDMA_REGMEMSYNC(conn, buff, len, handle, synchandle)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, buff, \
	    len, handle, synchandle)

#define	RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \
	    handle, synchandle)

#define	RDMA_SYNCMEM(conn, handle, buff, len, direction)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \
	    buff, len, direction)

#define	RDMA_BUF_ALLOC(conn, rbuf)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_buf_alloc)(conn, rbuf)

#define	RDMA_BUF_FREE(conn, rbuf)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_buf_free)(conn, rbuf)

#define	RDMA_SEND(conn, sendlist, xid)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_send)(conn, sendlist, xid)

#define	RDMA_SEND_RESP(conn, sendlist, xid)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_send_resp)(conn, sendlist, xid)

#define	RDMA_CLNT_RECVBUF(conn, cl, xid)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_clnt_recvbuf)(conn, cl, xid)

#define	RDMA_SVC_RECVBUF(conn, cl)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_svc_recvbuf)(conn, cl)

#define	RDMA_RECV(conn, recvlist, xid)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_recv)(conn, recvlist, xid)

#define	RDMA_READ(conn, cl, wait)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_read)(conn, cl, wait)

#define	RDMA_WRITE(conn, cl, wait)	\
	(*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait)

#define	RDMA_GETINFO(rdma_mod, info)	\
	(*(rdma_mod)->rdma_ops->rdma_getinfo)(info)

#ifdef _KERNEL
extern rdma_registry_t	*rdma_mod_head;
extern krwlock_t rdma_lock;		/* protects rdma_mod_head list */
extern int rdma_modloaded;		/* flag for loading RDMA plugins */
extern int rdma_dev_available;		/* rdma device is loaded or not */
extern kmutex_t rdma_modload_lock;	/* protects rdma_modloaded flag */
extern uint_t rdma_minchunk;
extern ldi_ident_t rpcmod_li; 		/* needed by layed driver framework */

/*
 * General RDMA routines
 */
extern void clist_add(struct clist **clp, uint32_t xdroff, int len,
	struct mrc *shandle, caddr_t saddr,
	struct mrc *dhandle, caddr_t daddr);
extern void clist_free(struct clist *cl);
extern int clist_register(CONN *conn, struct clist *cl, bool_t src);
extern int clist_deregister(CONN *conn, struct clist *cl, bool_t src);
rdma_stat rdma_clnt_postrecv(CONN *conn, uint32_t xid);
rdma_stat rdma_svc_postrecv(CONN *conn);
extern rdma_stat clist_syncmem(CONN *conn, struct clist *cl, bool_t src);
extern rdma_stat rdma_register_mod(rdma_mod_t *mod);
extern rdma_stat rdma_unregister_mod(rdma_mod_t *mod);
extern void rdma_buf_free(CONN *conn, rdma_buf_t *rbuf);
extern int rdma_modload();

/*
 * RDMA XDR
 */
extern void xdrrdma_create(XDR *, caddr_t, uint_t, int, struct clist *,
	enum xdr_op, CONN *);
extern void xdrrdma_destroy(XDR *);
extern struct clist *xdrrdma_clist(XDR *);
extern uint_t xdrrdma_getpos(XDR *);
extern bool_t xdrrdma_setpos(XDR *, uint_t);
extern bool_t xdr_clist(XDR *, clist *);
extern bool_t xdr_do_clist(XDR *, clist **);
extern uint_t xdr_getbufsize(XDR *);
unsigned int xdrrdma_sizeof(xdrproc_t func, void *data, int min_chunk);
unsigned int xdrrdma_authsize(AUTH *auth, struct cred *cred, int min_chunk);
#endif /* _KERNEL */

#ifdef __cplusplus
}
#endif

#endif	/* _RPC_RPC_RDMA_H */