1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
|
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ASYNCIO_H
#define _ASYNCIO_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <thread.h>
#include <pthread.h>
#include <setjmp.h>
#include <signal.h>
#include <siginfo.h>
#include <aio.h>
#include <limits.h>
#include <ucontext.h>
#include <sys/asynch.h>
#include <sys/mman.h>
#if !defined(_LP64)
#define AIOSTKSIZE (64 * 1024)
#else
#define AIOSTKSIZE (128 * 1024)
#endif
#define SIGAIOCANCEL SIGLWP /* special aio cancelation signal */
#define AIO_WAITN_MAXIOCBS 32768 /* max. iocbs per system call */
/*
* Declare structure types. The structures themselves are defined below.
*/
typedef struct aio_args aio_args_t;
typedef struct aio_lio aio_lio_t;
typedef struct notif_param notif_param_t;
typedef struct aio_req aio_req_t;
typedef struct aio_worker aio_worker_t;
typedef struct aio_hash aio_hash_t;
struct aio_args {
int fd;
caddr_t buf;
size_t bufsz;
offset_t offset;
};
/*
* list head for UFS list I/O
*/
struct aio_lio {
mutex_t lio_mutex; /* list mutex */
cond_t lio_cond_cv; /* list notification for I/O done */
aio_lio_t *lio_next; /* pointer to next on freelist */
char lio_mode; /* LIO_WAIT/LIO_NOWAIT */
char lio_canned; /* lio was canceled */
char lio_largefile; /* largefile operation */
char lio_waiting; /* waiting in __lio_listio() */
int lio_nent; /* Number of list I/O's */
int lio_refcnt; /* outstanding I/O's */
int lio_event; /* Event number for notification */
int lio_port; /* Port number for notification */
int lio_signo; /* Signal number for notification */
union sigval lio_sigval; /* Signal parameter */
uintptr_t lio_object; /* for SIGEV_THREAD or SIGEV_PORT */
struct sigevent *lio_sigevent; /* Notification function and attr. */
};
/*
* Notification parameters
*/
struct notif_param {
int np_signo; /* SIGEV_SIGNAL */
int np_port; /* SIGEV_THREAD or SIGEV_PORT */
void *np_user;
int np_event;
uintptr_t np_object;
int np_lio_signo; /* listio: SIGEV_SIGNAL */
int np_lio_port; /* listio: SIGEV_THREAD or SIGEV_PORT */
void *np_lio_user;
int np_lio_event;
uintptr_t np_lio_object;
};
struct aio_req {
/*
* fields protected by _aio_mutex lock.
*/
aio_req_t *req_link; /* hash/freelist chain link */
/*
* when req is on the doneq, then req_next is protected by
* the _aio_mutex lock. when the req is on a work q, then
* req_next is protected by a worker's work_qlock1 lock.
*/
aio_req_t *req_next; /* request/done queue link */
aio_req_t *req_prev; /* double linked list */
/*
* fields protected by a worker's work_qlock1 lock.
*/
char req_state; /* AIO_REQ_QUEUED, ... */
/*
* fields require no locking.
*/
char req_type; /* AIO_POSIX_REQ or not */
char req_largefile; /* largefile operation */
char req_op; /* AIOREAD, etc. */
aio_worker_t *req_worker; /* associate request with worker */
aio_result_t *req_resultp; /* address of result buffer */
aio_args_t req_args; /* arglist */
aio_lio_t *req_head; /* list head for LIO */
struct sigevent req_sigevent;
void *req_aiocbp; /* ptr to aiocb or aiocb64 */
notif_param_t req_notify; /* notification parameters */
};
/* special lio type that destroys itself when lio refcnt becomes zero */
#define LIO_FSYNC LIO_WAIT+1
#define LIO_DESTROY LIO_FSYNC+1
/* lio flags */
#define LIO_FSYNC_CANCELED 0x1
/* values for aio_state */
#define AIO_REQ_QUEUED 1
#define AIO_REQ_INPROGRESS 2
#define AIO_REQ_CANCELED 3
#define AIO_REQ_DONE 4
#define AIO_REQ_FREE 5
#define AIO_REQ_DONEQ 6
/* use KAIO in _aio_rw() */
#define AIO_NO_KAIO 0x0
#define AIO_KAIO 0x1
#define AIO_NO_DUPS 0x2
#define AIO_POSIX_REQ 0x1
#define CHECK 1
#define NOCHECK 2
#define CHECKED 3
#define USERAIO 4
#define USERAIO_DONE 5
/* values for _aio_flags */
/* if set, _aiodone() notifies aio_waitn about done requests */
#define AIO_WAIT_INPROGRESS 0x1
/* if set, _aiodone() wakes up functions waiting for completed I/Os */
#define AIO_IO_WAITING 0x2
#define AIO_LIB_WAITN 0x4 /* aio_waitn in progress */
#define AIO_LIB_WAITN_PENDING 0x8 /* aio_waitn requests pending */
/*
* Before a kaio() system call, the fd will be checked
* to ensure that kernel async. I/O is supported for this file.
* The only way to find out is if a kaio() call returns ENOTSUP,
* so the default will always be to try the kaio() call. Only in
* the specific instance of a kaio() call returning ENOTSUP
* will we stop submitting kaio() calls for that fd.
* If the fd is outside the array bounds, we will allow the kaio()
* call.
*
* The only way that an fd entry can go from ENOTSUP to supported
* is if that fd is freed up by a close(), and close will clear
* the entry for that fd.
*
* Each fd gets a bit in the array _kaio_supported[].
*
* uint32_t _kaio_supported[MAX_KAIO_FDARRAY_SIZE];
*
* Array is MAX_KAIO_ARRAY_SIZE of 32-bit elements, for 8kb.
* If more than (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE)
* files are open, this can be expanded.
*/
#define MAX_KAIO_FDARRAY_SIZE 2048
#define KAIO_FDARRAY_ELEM_SIZE WORD_BIT /* uint32_t */
#define MAX_KAIO_FDS (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE)
#define VALID_FD(fdes) ((fdes) >= 0 && (fdes) < MAX_KAIO_FDS)
#define KAIO_SUPPORTED(fdes) \
(!VALID_FD(fdes) || \
((_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] & \
(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))) == 0))
#define SET_KAIO_NOT_SUPPORTED(fdes) \
if (VALID_FD(fdes)) \
_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] |= \
(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))
#define CLEAR_KAIO_SUPPORTED(fdes) \
if (VALID_FD(fdes)) \
_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] &= \
~(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))
struct aio_worker {
aio_worker_t *work_forw; /* forward link in list of workers */
aio_worker_t *work_backw; /* backwards link in list of workers */
mutex_t work_qlock1; /* lock for work queue 1 */
cond_t work_idle_cv; /* place to sleep when idle */
aio_req_t *work_head1; /* head of work request queue 1 */
aio_req_t *work_tail1; /* tail of work request queue 1 */
aio_req_t *work_next1; /* work queue one's next pointer */
aio_req_t *work_prev1; /* last request done from queue 1 */
aio_req_t *work_req; /* active work request */
thread_t work_tid; /* worker's thread-id */
int work_count1; /* length of work queue one */
int work_done1; /* number of requests done */
int work_minload1; /* min length of queue */
int work_idleflg; /* when set, worker is idle */
sigjmp_buf work_jmp_buf; /* cancellation point */
};
struct aio_hash { /* resultp hash table */
mutex_t hash_lock;
aio_req_t *hash_ptr;
#if !defined(_LP64)
void *hash_pad; /* ensure sizeof (aio_hash_t) == 32 */
#endif
};
extern aio_hash_t *_aio_hash;
#define HASHSZ 2048 /* power of 2 */
#define AIOHASH(resultp) ((((uintptr_t)(resultp) >> 17) ^ \
((uintptr_t)(resultp) >> 2)) & (HASHSZ - 1))
#define POSIX_AIO(x) ((x)->req_type == AIO_POSIX_REQ)
extern int __uaio_init(void);
extern void _kaio_init(void);
extern intptr_t _kaio(int, ...);
extern int _aiorw(int, caddr_t, int, offset_t, int, aio_result_t *, int);
extern int _aio_rw(aiocb_t *, aio_lio_t *, aio_worker_t **, int, int);
#if !defined(_LP64)
extern int _aio_rw64(aiocb64_t *, aio_lio_t *, aio_worker_t **, int, int);
#endif
extern int _aio_create_worker(aio_req_t *, int);
extern int _aio_cancel_req(aio_worker_t *, aio_req_t *, int *, int *);
extern int aiocancel_all(int);
extern void aio_panic(const char *);
extern aio_req_t *_aio_hash_find(aio_result_t *);
extern aio_req_t *_aio_hash_del(aio_result_t *);
extern void _aio_req_mark_done(aio_req_t *);
extern void _aio_waitn_wakeup(void);
extern aio_worker_t *_aio_worker_alloc(void);
extern void _aio_worker_free(void *);
extern aio_req_t *_aio_req_alloc(void);
extern void _aio_req_free(aio_req_t *);
extern aio_lio_t *_aio_lio_alloc(void);
extern void _aio_lio_free(aio_lio_t *);
extern int _aio_idle(aio_worker_t *);
extern void *_aio_do_request(void *);
extern void *_aio_do_notify(void *);
extern void _lio_remove(aio_req_t *);
extern aio_req_t *_aio_req_remove(aio_req_t *);
extern int _aio_get_timedelta(timespec_t *, timespec_t *);
extern aio_result_t *_aio_req_done(void);
extern void _aio_set_result(aio_req_t *, ssize_t, int);
extern int _aio_sigev_thread_init(struct sigevent *);
extern int _aio_sigev_thread(aiocb_t *);
#if !defined(_LP64)
extern int _aio_sigev_thread64(aiocb64_t *);
#endif
extern aio_worker_t *_kaiowp; /* points to kaio cleanup thread */
extern aio_worker_t *__workers_rw; /* list of all rw workers */
extern aio_worker_t *__nextworker_rw; /* worker chosen for next rw request */
extern int __rw_workerscnt; /* number of rw workers */
extern aio_worker_t *__workers_no; /* list of all notification workers */
extern aio_worker_t *__nextworker_no; /* worker chosen, next notification */
extern int __no_workerscnt; /* number of notification workers */
extern mutex_t __aio_initlock; /* makes aio initialization atomic */
extern cond_t __aio_initcv;
extern int __aio_initbusy;
extern mutex_t __aio_mutex; /* global aio lock */
extern cond_t _aio_iowait_cv; /* wait for userland I/Os */
extern cond_t _aio_waitn_cv; /* wait for end of aio_waitn */
extern int _max_workers; /* max number of workers permitted */
extern int _min_workers; /* min number of workers */
extern sigset_t _worker_set; /* worker's signal mask */
extern int _aio_worker_cnt; /* number of AIO workers */
extern int _sigio_enabled; /* when set, send SIGIO signal */
extern pid_t __pid; /* process's PID */
extern int __uaio_ok; /* indicates if aio is initialized */
extern int _kaio_ok; /* indicates if kaio is initialized */
extern pthread_key_t _aio_key; /* for thread-specific data */
extern aio_req_t *_aio_done_tail; /* list of done requests */
extern aio_req_t *_aio_done_head;
extern aio_req_t *_aio_doneq;
extern int _aio_freelist_cnt;
extern int _aio_allocated_cnt;
extern int _aio_donecnt;
extern int _aio_doneq_cnt;
extern int _aio_waitncnt; /* # of requests for aio_waitn */
extern int _aio_outstand_cnt; /* # of outstanding requests */
extern int _kaio_outstand_cnt; /* # of outstanding kaio requests */
extern int _aio_req_done_cnt; /* req. done but not in "done queue" */
extern int _aio_kernel_suspend; /* active kernel kaio calls */
extern int _aio_suscv_cnt; /* aio_suspend calls waiting on cv's */
extern int _aiowait_flag; /* when set, aiowait() is inprogress */
extern int _aio_flags; /* see defines, above */
extern uint32_t *_kaio_supported;
extern const sigset_t maskset; /* all maskable signals */
#ifdef __cplusplus
}
#endif
#endif /* _ASYNCIO_H */
|