diff options
Diffstat (limited to 'usr/src/lib/libaio/common')
-rw-r--r-- | usr/src/lib/libaio/common/Makefile | 49 | ||||
-rw-r--r-- | usr/src/lib/libaio/common/aio.c | 2089 | ||||
-rw-r--r-- | usr/src/lib/libaio/common/libaio.h | 396 | ||||
-rw-r--r-- | usr/src/lib/libaio/common/llib-laio | 43 | ||||
-rw-r--r-- | usr/src/lib/libaio/common/ma.c | 449 | ||||
-rw-r--r-- | usr/src/lib/libaio/common/posix_aio.c | 1717 | ||||
-rw-r--r-- | usr/src/lib/libaio/common/scalls.c | 59 | ||||
-rw-r--r-- | usr/src/lib/libaio/common/sig.c | 296 | ||||
-rw-r--r-- | usr/src/lib/libaio/common/subr.c | 58 |
9 files changed, 6 insertions, 5150 deletions
diff --git a/usr/src/lib/libaio/common/Makefile b/usr/src/lib/libaio/common/Makefile deleted file mode 100644 index 5a58f96bc0..0000000000 --- a/usr/src/lib/libaio/common/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# lib/libaio/common/Makefile - -LINTSRC32= lintsrc32 -LINTOUT32= lint32.out -LINTLIB32= $(LIBNAME)32 -$(LINTSRC32):= LINTFLAGS += - -LINTSRC64= lintsrc64 -LINTOUT64= lint64.out -LINTLIB64= $(LIBNAME)64 -$(LINTSRC64):= LINTFLAGS64 += -fd -Xtransition=yes - -lints : $(LINTSRC32) $(LINTSRC64) - -$(LINTSRC32): $$(SRCS) - $(LINT.c) -o $(LINTLIB32) $(SRCS) > $(LINTOUT32) 2>&1 - -$(LINTSRC64): $$(SRCS) - $(LINT64.c) -o $(LINTLIB64) $(SRCS) > $(LINTOUT64) 2>&1 - -include ../Makefile.com - diff --git a/usr/src/lib/libaio/common/aio.c b/usr/src/lib/libaio/common/aio.c deleted file mode 100644 index 6108245fa4..0000000000 --- a/usr/src/lib/libaio/common/aio.c +++ /dev/null @@ -1,2089 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libaio.h" -#include <atomic.h> -#include <sys/param.h> -#include <sys/file.h> -#include <sys/port.h> - -static int _aio_hash_insert(aio_result_t *, aio_req_t *); -static aio_req_t *_aio_req_get(aio_worker_t *); -static void _aio_req_add(aio_req_t *, aio_worker_t **, int); -static void _aio_req_del(aio_worker_t *, aio_req_t *, int); -static void _aio_work_done(aio_worker_t *); -aio_req_t *_aio_req_remove(aio_req_t *); -static void _aio_enq_doneq(aio_req_t *); - -extern void _aio_lio_free(aio_lio_t *); - -extern int __fdsync(int, int); -extern int _port_dispatch(int, int, int, int, uintptr_t, void *); - -static int _aio_fsync_del(aio_worker_t *, aio_req_t *); -static void _aiodone(aio_req_t *, ssize_t, int); -static void _aio_cancel_work(aio_worker_t *, int, int *, int *); -static void _aio_finish_request(aio_worker_t *, ssize_t, int); - -/* - * switch for kernel async I/O - */ -int _kaio_ok = 0; /* 0 = disabled, 1 = on, -1 = error */ - -/* - * Key for thread-specific data - */ -pthread_key_t _aio_key; - -/* - * Array for determining whether or not a file supports kaio. - * Initialized in _kaio_init(). - */ -uint32_t *_kaio_supported = NULL; - -/* - * workers for read/write requests - * (__aio_mutex lock protects circular linked list of workers) - */ -aio_worker_t *__workers_rw; /* circular list of AIO workers */ -aio_worker_t *__nextworker_rw; /* next worker in list of workers */ -int __rw_workerscnt; /* number of read/write workers */ - -/* - * worker for notification requests. - */ -aio_worker_t *__workers_no; /* circular list of AIO workers */ -aio_worker_t *__nextworker_no; /* next worker in list of workers */ -int __no_workerscnt; /* number of write workers */ - -aio_req_t *_aio_done_tail; /* list of done requests */ -aio_req_t *_aio_done_head; - -mutex_t __aio_initlock = DEFAULTMUTEX; /* makes aio initialization atomic */ -mutex_t __aio_mutex = DEFAULTMUTEX; /* protects counts, and linked lists */ -cond_t _aio_iowait_cv = DEFAULTCV; /* wait for userland I/Os */ - -pid_t __pid = (pid_t)-1; /* initialize as invalid pid */ -int _sigio_enabled = 0; /* when set, send SIGIO signal */ - -aio_hash_t *_aio_hash; - -aio_req_t *_aio_doneq; /* double linked done queue list */ - -int _aio_donecnt = 0; -int _aio_waitncnt = 0; /* # of requests for aio_waitn */ -int _aio_doneq_cnt = 0; -int _aio_outstand_cnt = 0; /* # of outstanding requests */ -int _kaio_outstand_cnt = 0; /* # of outstanding kaio requests */ -int _aio_req_done_cnt = 0; /* req. done but not in "done queue" */ -int _aio_kernel_suspend = 0; /* active kernel kaio calls */ -int _aio_suscv_cnt = 0; /* aio_suspend calls waiting on cv's */ - -int _max_workers = 256; /* max number of workers permitted */ -int _min_workers = 8; /* min number of workers */ -int _minworkload = 2; /* min number of request in q */ -int _aio_worker_cnt = 0; /* number of workers to do requests */ -int __uaio_ok = 0; /* AIO has been enabled */ -sigset_t _worker_set; /* worker's signal mask */ -sigset_t _full_set; /* all signals (sigfillset()) */ - -int _aiowait_flag = 0; /* when set, aiowait() is inprogress */ -int _aio_flags = 0; /* see libaio.h defines for */ - -aio_worker_t *_kaiowp; /* points to kaio cleanup thread */ - -int hz; /* clock ticks per second */ - -static int -_kaio_supported_init(void) -{ - void *ptr; - size_t size; - - if (_kaio_supported != NULL) /* already initialized */ - return (0); - - size = MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t); - ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, (off_t)0); - if (ptr == MAP_FAILED) - return (-1); - _kaio_supported = ptr; - return (0); -} - -/* - * libaio is initialized when an AIO request is made. Important - * constants are initialized like the max number of workers that - * libaio can create, and the minimum number of workers permitted before - * imposing some restrictions. Also, some workers are created. - */ -int -__uaio_init(void) -{ - int i; - int ret; - - sig_mutex_lock(&__aio_initlock); - if (__uaio_ok) { /* already initialized */ - sig_mutex_unlock(&__aio_initlock); - return (0); - } - - ret = -1; - - hz = (int)sysconf(_SC_CLK_TCK); - __pid = getpid(); - - init_signals(); - - if (_kaio_supported_init() != 0) - goto out; - - /* - * Allocate and initialize the hash table. - */ - /* LINTED pointer cast */ - _aio_hash = (aio_hash_t *)mmap(NULL, - HASHSZ * sizeof (aio_hash_t), PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, (off_t)0); - if ((void *)_aio_hash == MAP_FAILED) { - _aio_hash = NULL; - goto out; - } - for (i = 0; i < HASHSZ; i++) - (void) mutex_init(&_aio_hash[i].hash_lock, USYNC_THREAD, NULL); - - /* - * Initialize worker's signal mask to only catch SIGAIOCANCEL. - */ - (void) sigfillset(&_full_set); - (void) sigfillset(&_worker_set); - (void) sigdelset(&_worker_set, SIGAIOCANCEL); - - /* - * Create the minimum number of workers. - */ - for (i = 0; i < _min_workers; i++) - (void) _aio_create_worker(NULL, AIOREAD); - - /* - * Create one worker to send asynchronous notifications. - */ - (void) _aio_create_worker(NULL, AIONOTIFY); - - __uaio_ok = 1; - ret = 0; - -out: - sig_mutex_unlock(&__aio_initlock); - return (ret); -} - -/* - * special kaio cleanup thread sits in a loop in the - * kernel waiting for pending kaio requests to complete. - */ -void * -_kaio_cleanup_thread(void *arg) -{ - if (pthread_setspecific(_aio_key, arg) != 0) - _aiopanic("_kaio_cleanup_thread, pthread_setspecific()"); - (void) _kaio(AIOSTART); - return (arg); -} - -/* - * initialize kaio. - */ -void -_kaio_init() -{ - int error; - sigset_t set; - sigset_t oset; - - sig_mutex_lock(&__aio_initlock); - if (_kaio_supported_init() != 0) - _kaio_ok = -1; - if (_kaio_ok == 0) { - if ((_kaiowp = _aio_worker_alloc()) == NULL) { - error = ENOMEM; - } else { - if ((error = (int)_kaio(AIOINIT)) == 0) { - (void) sigfillset(&set); - (void) pthread_sigmask(SIG_SETMASK, - &set, &oset); - error = thr_create(NULL, AIOSTKSIZE, - _kaio_cleanup_thread, _kaiowp, - THR_DAEMON, &_kaiowp->work_tid); - (void) pthread_sigmask(SIG_SETMASK, - &oset, NULL); - } - if (error) { - _aio_worker_free(_kaiowp); - _kaiowp = NULL; - } - } - if (error) - _kaio_ok = -1; - else - _kaio_ok = 1; - } - sig_mutex_unlock(&__aio_initlock); -} - -int -aioread(int fd, caddr_t buf, int bufsz, off_t offset, int whence, - aio_result_t *resultp) -{ - return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOREAD)); -} - -int -aiowrite(int fd, caddr_t buf, int bufsz, off_t offset, int whence, - aio_result_t *resultp) -{ - return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOWRITE)); -} - -#if !defined(_LP64) -int -aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, - aio_result_t *resultp) -{ - return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAREAD64)); -} - -int -aiowrite64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, - aio_result_t *resultp) -{ - return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAWRITE64)); -} -#endif /* !defined(_LP64) */ - -int -_aiorw(int fd, caddr_t buf, int bufsz, offset_t offset, int whence, - aio_result_t *resultp, int mode) -{ - aio_req_t *reqp; - aio_args_t *ap; - offset_t loffset; - struct stat stat; - int error = 0; - int kerr; - int umode; - - switch (whence) { - - case SEEK_SET: - loffset = offset; - break; - case SEEK_CUR: - if ((loffset = llseek(fd, 0, SEEK_CUR)) == -1) - error = -1; - else - loffset += offset; - break; - case SEEK_END: - if (fstat(fd, &stat) == -1) - error = -1; - else - loffset = offset + stat.st_size; - break; - default: - errno = EINVAL; - error = -1; - } - - if (error) - return (error); - - /* initialize kaio */ - if (!_kaio_ok) - _kaio_init(); - - /* - * _aio_do_request() needs the original request code (mode) to be able - * to choose the appropiate 32/64 bit function. All other functions - * only require the difference between READ and WRITE (umode). - */ - if (mode == AIOAREAD64 || mode == AIOAWRITE64) - umode = mode - AIOAREAD64; - else - umode = mode; - - /* - * Try kernel aio first. - * If errno is ENOTSUP/EBADFD, fall back to the thread implementation. - */ - if (_kaio_ok > 0 && KAIO_SUPPORTED(fd)) { - resultp->aio_errno = 0; - sig_mutex_lock(&__aio_mutex); - _kaio_outstand_cnt++; - kerr = (int)_kaio(((resultp->aio_return == AIO_INPROGRESS) ? - (umode | AIO_POLL_BIT) : umode), - fd, buf, bufsz, loffset, resultp); - if (kerr == 0) { - sig_mutex_unlock(&__aio_mutex); - return (0); - } - _kaio_outstand_cnt--; - sig_mutex_unlock(&__aio_mutex); - if (errno != ENOTSUP && errno != EBADFD) - return (-1); - if (errno == EBADFD) - SET_KAIO_NOT_SUPPORTED(fd); - } - - if (!__uaio_ok && __uaio_init() == -1) - return (-1); - - if ((reqp = _aio_req_alloc()) == NULL) { - errno = EAGAIN; - return (-1); - } - - /* - * _aio_do_request() checks reqp->req_op to differentiate - * between 32 and 64 bit access. - */ - reqp->req_op = mode; - reqp->req_resultp = resultp; - ap = &reqp->req_args; - ap->fd = fd; - ap->buf = buf; - ap->bufsz = bufsz; - ap->offset = loffset; - - if (_aio_hash_insert(resultp, reqp) != 0) { - _aio_req_free(reqp); - errno = EINVAL; - return (-1); - } - /* - * _aio_req_add() only needs the difference between READ and - * WRITE to choose the right worker queue. - */ - _aio_req_add(reqp, &__nextworker_rw, umode); - return (0); -} - -int -aiocancel(aio_result_t *resultp) -{ - aio_req_t *reqp; - aio_worker_t *aiowp; - int ret; - int done = 0; - int canceled = 0; - - if (!__uaio_ok) { - errno = EINVAL; - return (-1); - } - - sig_mutex_lock(&__aio_mutex); - reqp = _aio_hash_find(resultp); - if (reqp == NULL) { - if (_aio_outstand_cnt == _aio_req_done_cnt) - errno = EINVAL; - else - errno = EACCES; - ret = -1; - } else { - aiowp = reqp->req_worker; - sig_mutex_lock(&aiowp->work_qlock1); - (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); - sig_mutex_unlock(&aiowp->work_qlock1); - - if (canceled) { - ret = 0; - } else { - if (_aio_outstand_cnt == 0 || - _aio_outstand_cnt == _aio_req_done_cnt) - errno = EINVAL; - else - errno = EACCES; - ret = -1; - } - } - sig_mutex_unlock(&__aio_mutex); - return (ret); -} - -/* - * This must be asynch safe - */ -aio_result_t * -aiowait(struct timeval *uwait) -{ - aio_result_t *uresultp; - aio_result_t *kresultp; - aio_result_t *resultp; - int dontblock; - int timedwait = 0; - int kaio_errno = 0; - struct timeval twait; - struct timeval *wait = NULL; - hrtime_t hrtend; - hrtime_t hres; - - if (uwait) { - /* - * Check for a valid specified wait time. - * If it is invalid, fail the call right away. - */ - if (uwait->tv_sec < 0 || uwait->tv_usec < 0 || - uwait->tv_usec >= MICROSEC) { - errno = EINVAL; - return ((aio_result_t *)-1); - } - - if (uwait->tv_sec > 0 || uwait->tv_usec > 0) { - hrtend = gethrtime() + - (hrtime_t)uwait->tv_sec * NANOSEC + - (hrtime_t)uwait->tv_usec * (NANOSEC / MICROSEC); - twait = *uwait; - wait = &twait; - timedwait++; - } else { - /* polling */ - sig_mutex_lock(&__aio_mutex); - if (_kaio_outstand_cnt == 0) { - kresultp = (aio_result_t *)-1; - } else { - kresultp = (aio_result_t *)_kaio(AIOWAIT, - (struct timeval *)-1, 1); - if (kresultp != (aio_result_t *)-1 && - kresultp != NULL && - kresultp != (aio_result_t *)1) { - _kaio_outstand_cnt--; - sig_mutex_unlock(&__aio_mutex); - return (kresultp); - } - } - uresultp = _aio_req_done(); - sig_mutex_unlock(&__aio_mutex); - if (uresultp != NULL && - uresultp != (aio_result_t *)-1) { - return (uresultp); - } - if (uresultp == (aio_result_t *)-1 && - kresultp == (aio_result_t *)-1) { - errno = EINVAL; - return ((aio_result_t *)-1); - } else { - return (NULL); - } - } - } - - for (;;) { - sig_mutex_lock(&__aio_mutex); - uresultp = _aio_req_done(); - if (uresultp != NULL && uresultp != (aio_result_t *)-1) { - sig_mutex_unlock(&__aio_mutex); - resultp = uresultp; - break; - } - _aiowait_flag++; - dontblock = (uresultp == (aio_result_t *)-1); - if (dontblock && _kaio_outstand_cnt == 0) { - kresultp = (aio_result_t *)-1; - kaio_errno = EINVAL; - } else { - sig_mutex_unlock(&__aio_mutex); - kresultp = (aio_result_t *)_kaio(AIOWAIT, - wait, dontblock); - sig_mutex_lock(&__aio_mutex); - kaio_errno = errno; - } - _aiowait_flag--; - sig_mutex_unlock(&__aio_mutex); - if (kresultp == (aio_result_t *)1) { - /* aiowait() awakened by an aionotify() */ - continue; - } else if (kresultp != NULL && - kresultp != (aio_result_t *)-1) { - resultp = kresultp; - sig_mutex_lock(&__aio_mutex); - _kaio_outstand_cnt--; - sig_mutex_unlock(&__aio_mutex); - break; - } else if (kresultp == (aio_result_t *)-1 && - kaio_errno == EINVAL && - uresultp == (aio_result_t *)-1) { - errno = kaio_errno; - resultp = (aio_result_t *)-1; - break; - } else if (kresultp == (aio_result_t *)-1 && - kaio_errno == EINTR) { - errno = kaio_errno; - resultp = (aio_result_t *)-1; - break; - } else if (timedwait) { - hres = hrtend - gethrtime(); - if (hres <= 0) { - /* time is up; return */ - resultp = NULL; - break; - } else { - /* - * Some time left. Round up the remaining time - * in nanoseconds to microsec. Retry the call. - */ - hres += (NANOSEC / MICROSEC) - 1; - wait->tv_sec = hres / NANOSEC; - wait->tv_usec = - (hres % NANOSEC) / (NANOSEC / MICROSEC); - } - } else { - ASSERT(kresultp == NULL && uresultp == NULL); - resultp = NULL; - continue; - } - } - return (resultp); -} - -/* - * _aio_get_timedelta calculates the remaining time and stores the result - * into timespec_t *wait. - */ - -int -_aio_get_timedelta(timespec_t *end, timespec_t *wait) -{ - int ret = 0; - struct timeval cur; - timespec_t curtime; - - (void) gettimeofday(&cur, NULL); - curtime.tv_sec = cur.tv_sec; - curtime.tv_nsec = cur.tv_usec * 1000; /* convert us to ns */ - - if (end->tv_sec >= curtime.tv_sec) { - wait->tv_sec = end->tv_sec - curtime.tv_sec; - if (end->tv_nsec >= curtime.tv_nsec) { - wait->tv_nsec = end->tv_nsec - curtime.tv_nsec; - if (wait->tv_sec == 0 && wait->tv_nsec == 0) - ret = -1; /* timer expired */ - } else { - if (end->tv_sec > curtime.tv_sec) { - wait->tv_sec -= 1; - wait->tv_nsec = NANOSEC - - (curtime.tv_nsec - end->tv_nsec); - } else { - ret = -1; /* timer expired */ - } - } - } else { - ret = -1; - } - return (ret); -} - -/* - * If closing by file descriptor: we will simply cancel all the outstanding - * aio`s and return. Those aio's in question will have either noticed the - * cancellation notice before, during, or after initiating io. - */ -int -aiocancel_all(int fd) -{ - aio_req_t *reqp; - aio_req_t **reqpp; - aio_worker_t *first; - aio_worker_t *next; - int canceled = 0; - int done = 0; - int cancelall = 0; - - sig_mutex_lock(&__aio_mutex); - - if (_aio_outstand_cnt == 0) { - sig_mutex_unlock(&__aio_mutex); - return (AIO_ALLDONE); - } - - /* - * Cancel requests from the read/write workers' queues. - */ - first = __nextworker_rw; - next = first; - do { - _aio_cancel_work(next, fd, &canceled, &done); - } while ((next = next->work_forw) != first); - - /* - * finally, check if there are requests on the done queue that - * should be canceled. - */ - if (fd < 0) - cancelall = 1; - reqpp = &_aio_done_tail; - while ((reqp = *reqpp) != NULL) { - if (cancelall || reqp->req_args.fd == fd) { - *reqpp = reqp->req_next; - _aio_donecnt--; - (void) _aio_hash_del(reqp->req_resultp); - _aio_req_free(reqp); - } else - reqpp = &reqp->req_next; - } - if (cancelall) { - ASSERT(_aio_donecnt == 0); - _aio_done_head = NULL; - } - sig_mutex_unlock(&__aio_mutex); - - if (canceled && done == 0) - return (AIO_CANCELED); - else if (done && canceled == 0) - return (AIO_ALLDONE); - else if ((canceled + done == 0) && KAIO_SUPPORTED(fd)) - return ((int)_kaio(AIOCANCEL, fd, NULL)); - return (AIO_NOTCANCELED); -} - -/* - * Cancel requests from a given work queue. If the file descriptor - * parameter, fd, is non-negative, then only cancel those requests - * in this queue that are to this file descriptor. If the fd - * parameter is -1, then cancel all requests. - */ -static void -_aio_cancel_work(aio_worker_t *aiowp, int fd, int *canceled, int *done) -{ - aio_req_t *reqp; - - sig_mutex_lock(&aiowp->work_qlock1); - /* - * cancel queued requests first. - */ - reqp = aiowp->work_tail1; - while (reqp != NULL) { - if (fd < 0 || reqp->req_args.fd == fd) { - if (_aio_cancel_req(aiowp, reqp, canceled, done)) { - /* - * Callers locks were dropped. - * reqp is invalid; start traversing - * the list from the beginning again. - */ - reqp = aiowp->work_tail1; - continue; - } - } - reqp = reqp->req_next; - } - /* - * Since the queued requests have been canceled, there can - * only be one inprogress request that should be canceled. - */ - if ((reqp = aiowp->work_req) != NULL && - (fd < 0 || reqp->req_args.fd == fd)) - (void) _aio_cancel_req(aiowp, reqp, canceled, done); - sig_mutex_unlock(&aiowp->work_qlock1); -} - -/* - * Cancel a request. Return 1 if the callers locks were temporarily - * dropped, otherwise return 0. - */ -int -_aio_cancel_req(aio_worker_t *aiowp, aio_req_t *reqp, int *canceled, int *done) -{ - int ostate = reqp->req_state; - - ASSERT(MUTEX_HELD(&__aio_mutex)); - ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); - if (ostate == AIO_REQ_CANCELED) - return (0); - if (ostate == AIO_REQ_DONE || ostate == AIO_REQ_DONEQ) { - (*done)++; - return (0); - } - if (reqp->req_op == AIOFSYNC && reqp != aiowp->work_req) { - ASSERT(POSIX_AIO(reqp)); - /* Cancel the queued aio_fsync() request */ - if (!reqp->req_head->lio_canned) { - reqp->req_head->lio_canned = 1; - _aio_outstand_cnt--; - (*canceled)++; - } - return (0); - } - reqp->req_state = AIO_REQ_CANCELED; - _aio_req_del(aiowp, reqp, ostate); - (void) _aio_hash_del(reqp->req_resultp); - (*canceled)++; - if (reqp == aiowp->work_req) { - ASSERT(ostate == AIO_REQ_INPROGRESS); - /* - * Set the result values now, before _aiodone() is called. - * We do this because the application can expect aio_return - * and aio_errno to be set to -1 and ECANCELED, respectively, - * immediately after a successful return from aiocancel() - * or aio_cancel(). - */ - _aio_set_result(reqp, -1, ECANCELED); - (void) thr_kill(aiowp->work_tid, SIGAIOCANCEL); - return (0); - } - if (!POSIX_AIO(reqp)) { - _aio_outstand_cnt--; - _aio_set_result(reqp, -1, ECANCELED); - return (0); - } - sig_mutex_unlock(&aiowp->work_qlock1); - sig_mutex_unlock(&__aio_mutex); - _aiodone(reqp, -1, ECANCELED); - sig_mutex_lock(&__aio_mutex); - sig_mutex_lock(&aiowp->work_qlock1); - return (1); -} - -/* - * This is the worker's main routine. - * The task of this function is to execute all queued requests; - * once the last pending request is executed this function will block - * in _aio_idle(). A new incoming request must wakeup this thread to - * restart the work. - * Every worker has an own work queue. The queue lock is required - * to synchronize the addition of new requests for this worker or - * cancellation of pending/running requests. - * - * Cancellation scenarios: - * The cancellation of a request is being done asynchronously using - * _aio_cancel_req() from another thread context. - * A queued request can be cancelled in different manners : - * a) request is queued but not "in progress" or "done" (AIO_REQ_QUEUED): - * - lock the queue -> remove the request -> unlock the queue - * - this function/thread does not detect this cancellation process - * b) request is in progress (AIO_REQ_INPROGRESS) : - * - this function first allow the cancellation of the running - * request with the flag "work_cancel_flg=1" - * see _aio_req_get() -> _aio_cancel_on() - * During this phase, it is allowed to interrupt the worker - * thread running the request (this thread) using the SIGAIOCANCEL - * signal. - * Once this thread returns from the kernel (because the request - * is just done), then it must disable a possible cancellation - * and proceed to finish the request. To disable the cancellation - * this thread must use _aio_cancel_off() to set "work_cancel_flg=0". - * c) request is already done (AIO_REQ_DONE || AIO_REQ_DONEQ): - * same procedure as in a) - * - * To b) - * This thread uses sigsetjmp() to define the position in the code, where - * it wish to continue working in the case that a SIGAIOCANCEL signal - * is detected. - * Normally this thread should get the cancellation signal during the - * kernel phase (reading or writing). In that case the signal handler - * aiosigcancelhndlr() is activated using the worker thread context, - * which again will use the siglongjmp() function to break the standard - * code flow and jump to the "sigsetjmp" position, provided that - * "work_cancel_flg" is set to "1". - * Because the "work_cancel_flg" is only manipulated by this worker - * thread and it can only run on one CPU at a given time, it is not - * necessary to protect that flag with the queue lock. - * Returning from the kernel (read or write system call) we must - * first disable the use of the SIGAIOCANCEL signal and accordingly - * the use of the siglongjmp() function to prevent a possible deadlock: - * - It can happens that this worker thread returns from the kernel and - * blocks in "work_qlock1", - * - then a second thread cancels the apparently "in progress" request - * and sends the SIGAIOCANCEL signal to the worker thread, - * - the worker thread gets assigned the "work_qlock1" and will returns - * from the kernel, - * - the kernel detects the pending signal and activates the signal - * handler instead, - * - if the "work_cancel_flg" is still set then the signal handler - * should use siglongjmp() to cancel the "in progress" request and - * it would try to acquire the same work_qlock1 in _aio_req_get() - * for a second time => deadlock. - * To avoid that situation we disable the cancellation of the request - * in progress BEFORE we try to acquire the work_qlock1. - * In that case the signal handler will not call siglongjmp() and the - * worker thread will continue running the standard code flow. - * Then this thread must check the AIO_REQ_CANCELED flag to emulate - * an eventually required siglongjmp() freeing the work_qlock1 and - * avoiding a deadlock. - */ -void * -_aio_do_request(void *arglist) -{ - aio_worker_t *aiowp = (aio_worker_t *)arglist; - struct aio_args *arg; - aio_req_t *reqp; /* current AIO request */ - ssize_t retval; - int error; - - if (pthread_setspecific(_aio_key, aiowp) != 0) - _aiopanic("_aio_do_request, pthread_setspecific()"); - (void) pthread_sigmask(SIG_SETMASK, &_worker_set, NULL); - ASSERT(aiowp->work_req == NULL); - - /* - * We resume here when an operation is cancelled. - * On first entry, aiowp->work_req == NULL, so all - * we do is block SIGAIOCANCEL. - */ - (void) sigsetjmp(aiowp->work_jmp_buf, 0); - - _sigoff(); /* block SIGAIOCANCEL */ - if (aiowp->work_req != NULL) - _aio_finish_request(aiowp, -1, ECANCELED); - - for (;;) { - /* - * Put completed requests on aio_done_list. This has - * to be done as part of the main loop to ensure that - * we don't artificially starve any aiowait'ers. - */ - if (aiowp->work_done1) - _aio_work_done(aiowp); - -top: - /* consume any deferred SIGAIOCANCEL signal here */ - _sigon(); - _sigoff(); - - while ((reqp = _aio_req_get(aiowp)) == NULL) - _aio_idle(aiowp); - arg = &reqp->req_args; - ASSERT(reqp->req_state == AIO_REQ_INPROGRESS || - reqp->req_state == AIO_REQ_CANCELED); - error = 0; - - switch (reqp->req_op) { - case AIOREAD: - case AIOAREAD: - _sigon(); /* unblock SIGAIOCANCEL */ - retval = pread(arg->fd, arg->buf, - arg->bufsz, arg->offset); - if (retval == -1) { - if (errno == ESPIPE) { - retval = read(arg->fd, - arg->buf, arg->bufsz); - if (retval == -1) - error = errno; - } else { - error = errno; - } - } - _sigoff(); /* block SIGAIOCANCEL */ - break; - case AIOWRITE: - case AIOAWRITE: - _sigon(); /* unblock SIGAIOCANCEL */ - retval = pwrite(arg->fd, arg->buf, - arg->bufsz, arg->offset); - if (retval == -1) { - if (errno == ESPIPE) { - retval = write(arg->fd, - arg->buf, arg->bufsz); - if (retval == -1) - error = errno; - } else { - error = errno; - } - } - _sigoff(); /* block SIGAIOCANCEL */ - break; -#if !defined(_LP64) - case AIOAREAD64: - _sigon(); /* unblock SIGAIOCANCEL */ - retval = pread64(arg->fd, arg->buf, - arg->bufsz, arg->offset); - if (retval == -1) { - if (errno == ESPIPE) { - retval = read(arg->fd, - arg->buf, arg->bufsz); - if (retval == -1) - error = errno; - } else { - error = errno; - } - } - _sigoff(); /* block SIGAIOCANCEL */ - break; - case AIOAWRITE64: - _sigon(); /* unblock SIGAIOCANCEL */ - retval = pwrite64(arg->fd, arg->buf, - arg->bufsz, arg->offset); - if (retval == -1) { - if (errno == ESPIPE) { - retval = write(arg->fd, - arg->buf, arg->bufsz); - if (retval == -1) - error = errno; - } else { - error = errno; - } - } - _sigoff(); /* block SIGAIOCANCEL */ - break; -#endif /* !defined(_LP64) */ - case AIOFSYNC: - if (_aio_fsync_del(aiowp, reqp)) - goto top; - ASSERT(reqp->req_head == NULL); - /* - * All writes for this fsync request are now - * acknowledged. Now make these writes visible - * and put the final request into the hash table. - */ - if (reqp->req_state == AIO_REQ_CANCELED) { - /* EMPTY */; - } else if (arg->offset == O_SYNC) { - if ((retval = __fdsync(arg->fd, FSYNC)) == -1) - error = errno; - } else { - if ((retval = __fdsync(arg->fd, FDSYNC)) == -1) - error = errno; - } - if (_aio_hash_insert(reqp->req_resultp, reqp) != 0) - _aiopanic("_aio_do_request(): AIOFSYNC: " - "request already in hash table"); - break; - default: - _aiopanic("_aio_do_request, bad op"); - } - - _aio_finish_request(aiowp, retval, error); - } - /* NOTREACHED */ - return (NULL); -} - -/* - * Perform the tail processing for _aio_do_request(). - * The in-progress request may or may not have been cancelled. - */ -static void -_aio_finish_request(aio_worker_t *aiowp, ssize_t retval, int error) -{ - aio_req_t *reqp; - - sig_mutex_lock(&aiowp->work_qlock1); - if ((reqp = aiowp->work_req) == NULL) - sig_mutex_unlock(&aiowp->work_qlock1); - else { - aiowp->work_req = NULL; - if (reqp->req_state == AIO_REQ_CANCELED) { - retval = -1; - error = ECANCELED; - } - if (!POSIX_AIO(reqp)) { - sig_mutex_unlock(&aiowp->work_qlock1); - sig_mutex_lock(&__aio_mutex); - if (reqp->req_state == AIO_REQ_INPROGRESS) - reqp->req_state = AIO_REQ_DONE; - _aio_req_done_cnt++; - _aio_set_result(reqp, retval, error); - if (error == ECANCELED) - _aio_outstand_cnt--; - sig_mutex_unlock(&__aio_mutex); - } else { - if (reqp->req_state == AIO_REQ_INPROGRESS) - reqp->req_state = AIO_REQ_DONE; - sig_mutex_unlock(&aiowp->work_qlock1); - _aiodone(reqp, retval, error); - } - } -} - -void -_aio_req_mark_done(aio_req_t *reqp) -{ -#if !defined(_LP64) - if (reqp->req_largefile) - ((aiocb64_t *)reqp->req_aiocbp)->aio_state = USERAIO_DONE; - else -#endif - ((aiocb_t *)reqp->req_aiocbp)->aio_state = USERAIO_DONE; -} - -/* - * Sleep for 'ticks' clock ticks to give somebody else a chance to run, - * hopefully to consume one of our queued signals. - */ -static void -_aio_delay(int ticks) -{ - (void) usleep(ticks * (MICROSEC / hz)); -} - -/* - * Actually send the notifications. - * We could block indefinitely here if the application - * is not listening for the signal or port notifications. - */ -static void -send_notification(notif_param_t *npp) -{ - int backoff; - - if (npp->np_signo) { - backoff = 0; - while (__sigqueue(__pid, npp->np_signo, npp->np_user, - SI_ASYNCIO) == -1) { - ASSERT(errno == EAGAIN); - if (++backoff > 10) - backoff = 10; - _aio_delay(backoff); - } - } else if (npp->np_port >= 0) { - (void) _port_dispatch(npp->np_port, 0, PORT_SOURCE_AIO, - npp->np_event, npp->np_object, npp->np_user); - } - if (npp->np_lio_signo) { - backoff = 0; - while (__sigqueue(__pid, npp->np_lio_signo, npp->np_lio_user, - SI_ASYNCIO) == -1) { - ASSERT(errno == EAGAIN); - if (++backoff > 10) - backoff = 10; - _aio_delay(backoff); - } - } else if (npp->np_lio_port >= 0) { - (void) _port_dispatch(npp->np_lio_port, 0, PORT_SOURCE_AIO, - npp->np_lio_event, npp->np_lio_object, npp->np_lio_user); - } -} - -/* - * Asynchronous notification worker. - */ -void * -_aio_do_notify(void *arg) -{ - aio_worker_t *aiowp = (aio_worker_t *)arg; - aio_req_t *reqp; - - /* - * This isn't really necessary. All signals are blocked. - */ - if (pthread_setspecific(_aio_key, aiowp) != 0) - _aiopanic("_aio_do_notify, pthread_setspecific()"); - - /* - * Notifications are never cancelled. - * All signals remain blocked, forever. - */ - - for (;;) { - while ((reqp = _aio_req_get(aiowp)) == NULL) - _aio_idle(aiowp); - send_notification(&reqp->req_notify); - _aio_req_free(reqp); - } - - /* NOTREACHED */ - return (NULL); -} - -/* - * Do the completion semantics for a request that was either canceled - * by _aio_cancel_req() or was completed by _aio_do_request(). - */ -static void -_aiodone(aio_req_t *reqp, ssize_t retval, int error) -{ - aio_result_t *resultp = reqp->req_resultp; - int notify = 0; - aio_lio_t *head; - int sigev_none; - int sigev_signal; - int sigev_thread; - int sigev_port; - notif_param_t np; - - /* - * We call _aiodone() only for Posix I/O. - */ - ASSERT(POSIX_AIO(reqp)); - - sigev_none = 0; - sigev_signal = 0; - sigev_thread = 0; - sigev_port = 0; - np.np_signo = 0; - np.np_port = -1; - np.np_lio_signo = 0; - np.np_lio_port = -1; - - switch (reqp->req_sigevent.sigev_notify) { - case SIGEV_NONE: - sigev_none = 1; - break; - case SIGEV_SIGNAL: - sigev_signal = 1; - break; - case SIGEV_THREAD: - sigev_thread = 1; - break; - case SIGEV_PORT: - sigev_port = 1; - break; - default: - _aiopanic("_aiodone: improper sigev_notify"); - break; - } - - /* - * Figure out the notification parameters while holding __aio_mutex. - * Actually perform the notifications after dropping __aio_mutex. - * This allows us to sleep for a long time (if the notifications - * incur delays) without impeding other async I/O operations. - */ - - sig_mutex_lock(&__aio_mutex); - - if (sigev_signal) { - if ((np.np_signo = reqp->req_sigevent.sigev_signo) != 0) - notify = 1; - np.np_user = reqp->req_sigevent.sigev_value.sival_ptr; - } else if (sigev_thread | sigev_port) { - if ((np.np_port = reqp->req_sigevent.sigev_signo) >= 0) - notify = 1; - np.np_event = reqp->req_op; - if (np.np_event == AIOFSYNC && reqp->req_largefile) - np.np_event = AIOFSYNC64; - np.np_object = (uintptr_t)reqp->req_aiocbp; - np.np_user = reqp->req_sigevent.sigev_value.sival_ptr; - } - - if (resultp->aio_errno == EINPROGRESS) - _aio_set_result(reqp, retval, error); - - _aio_outstand_cnt--; - - head = reqp->req_head; - reqp->req_head = NULL; - - if (sigev_none) { - _aio_enq_doneq(reqp); - reqp = NULL; - } else { - (void) _aio_hash_del(resultp); - _aio_req_mark_done(reqp); - } - - _aio_waitn_wakeup(); - - /* - * __aio_waitn() sets AIO_WAIT_INPROGRESS and - * __aio_suspend() increments "_aio_kernel_suspend" - * when they are waiting in the kernel for completed I/Os. - * - * _kaio(AIONOTIFY) awakes the corresponding function - * in the kernel; then the corresponding __aio_waitn() or - * __aio_suspend() function could reap the recently - * completed I/Os (_aiodone()). - */ - if ((_aio_flags & AIO_WAIT_INPROGRESS) || _aio_kernel_suspend > 0) - (void) _kaio(AIONOTIFY); - - sig_mutex_unlock(&__aio_mutex); - - if (head != NULL) { - /* - * If all the lio requests have completed, - * prepare to notify the waiting thread. - */ - sig_mutex_lock(&head->lio_mutex); - ASSERT(head->lio_refcnt == head->lio_nent); - if (head->lio_refcnt == 1) { - int waiting = 0; - if (head->lio_mode == LIO_WAIT) { - if ((waiting = head->lio_waiting) != 0) - (void) cond_signal(&head->lio_cond_cv); - } else if (head->lio_port < 0) { /* none or signal */ - if ((np.np_lio_signo = head->lio_signo) != 0) - notify = 1; - np.np_lio_user = head->lio_sigval.sival_ptr; - } else { /* thread or port */ - notify = 1; - np.np_lio_port = head->lio_port; - np.np_lio_event = head->lio_event; - np.np_lio_object = - (uintptr_t)head->lio_sigevent; - np.np_lio_user = head->lio_sigval.sival_ptr; - } - head->lio_nent = head->lio_refcnt = 0; - sig_mutex_unlock(&head->lio_mutex); - if (waiting == 0) - _aio_lio_free(head); - } else { - head->lio_nent--; - head->lio_refcnt--; - sig_mutex_unlock(&head->lio_mutex); - } - } - - /* - * The request is completed; now perform the notifications. - */ - if (notify) { - if (reqp != NULL) { - /* - * We usually put the request on the notification - * queue because we don't want to block and delay - * other operations behind us in the work queue. - * Also we must never block on a cancel notification - * because we are being called from an application - * thread in this case and that could lead to deadlock - * if no other thread is receiving notificatins. - */ - reqp->req_notify = np; - reqp->req_op = AIONOTIFY; - _aio_req_add(reqp, &__workers_no, AIONOTIFY); - reqp = NULL; - } else { - /* - * We already put the request on the done queue, - * so we can't queue it to the notification queue. - * Just do the notification directly. - */ - send_notification(&np); - } - } - - if (reqp != NULL) - _aio_req_free(reqp); -} - -/* - * Delete fsync requests from list head until there is - * only one left. Return 0 when there is only one, - * otherwise return a non-zero value. - */ -static int -_aio_fsync_del(aio_worker_t *aiowp, aio_req_t *reqp) -{ - aio_lio_t *head = reqp->req_head; - int rval = 0; - - ASSERT(reqp == aiowp->work_req); - sig_mutex_lock(&aiowp->work_qlock1); - sig_mutex_lock(&head->lio_mutex); - if (head->lio_refcnt > 1) { - head->lio_refcnt--; - head->lio_nent--; - aiowp->work_req = NULL; - sig_mutex_unlock(&head->lio_mutex); - sig_mutex_unlock(&aiowp->work_qlock1); - sig_mutex_lock(&__aio_mutex); - _aio_outstand_cnt--; - _aio_waitn_wakeup(); - sig_mutex_unlock(&__aio_mutex); - _aio_req_free(reqp); - return (1); - } - ASSERT(head->lio_nent == 1 && head->lio_refcnt == 1); - reqp->req_head = NULL; - if (head->lio_canned) - reqp->req_state = AIO_REQ_CANCELED; - if (head->lio_mode == LIO_DESTROY) { - aiowp->work_req = NULL; - rval = 1; - } - sig_mutex_unlock(&head->lio_mutex); - sig_mutex_unlock(&aiowp->work_qlock1); - head->lio_refcnt--; - head->lio_nent--; - _aio_lio_free(head); - if (rval != 0) - _aio_req_free(reqp); - return (rval); -} - -/* - * worker is set idle when its work queue is empty. - * The worker checks again that it has no more work and then - * goes to sleep waiting for more work. - */ -void -_aio_idle(aio_worker_t *aiowp) -{ - int error = 0; - - sig_mutex_lock(&aiowp->work_qlock1); - if (aiowp->work_count1 == 0) { - ASSERT(aiowp->work_minload1 == 0); - aiowp->work_idleflg = 1; - /* - * A cancellation handler is not needed here. - * aio worker threads are never cancelled via pthread_cancel(). - */ - error = sig_cond_wait(&aiowp->work_idle_cv, - &aiowp->work_qlock1); - /* - * The idle flag is normally cleared before worker is awakened - * by aio_req_add(). On error (EINTR), we clear it ourself. - */ - if (error) - aiowp->work_idleflg = 0; - } - sig_mutex_unlock(&aiowp->work_qlock1); -} - -/* - * A worker's completed AIO requests are placed onto a global - * done queue. The application is only sent a SIGIO signal if - * the process has a handler enabled and it is not waiting via - * aiowait(). - */ -static void -_aio_work_done(aio_worker_t *aiowp) -{ - aio_req_t *reqp; - - sig_mutex_lock(&aiowp->work_qlock1); - reqp = aiowp->work_prev1; - reqp->req_next = NULL; - aiowp->work_done1 = 0; - aiowp->work_tail1 = aiowp->work_next1; - if (aiowp->work_tail1 == NULL) - aiowp->work_head1 = NULL; - aiowp->work_prev1 = NULL; - sig_mutex_unlock(&aiowp->work_qlock1); - sig_mutex_lock(&__aio_mutex); - _aio_donecnt++; - _aio_outstand_cnt--; - _aio_req_done_cnt--; - ASSERT(_aio_donecnt > 0 && - _aio_outstand_cnt >= 0 && - _aio_req_done_cnt >= 0); - ASSERT(reqp != NULL); - - if (_aio_done_tail == NULL) { - _aio_done_head = _aio_done_tail = reqp; - } else { - _aio_done_head->req_next = reqp; - _aio_done_head = reqp; - } - - if (_aiowait_flag) { - sig_mutex_unlock(&__aio_mutex); - (void) _kaio(AIONOTIFY); - } else { - sig_mutex_unlock(&__aio_mutex); - if (_sigio_enabled) - (void) kill(__pid, SIGIO); - } -} - -/* - * The done queue consists of AIO requests that are in either the - * AIO_REQ_DONE or AIO_REQ_CANCELED state. Requests that were cancelled - * are discarded. If the done queue is empty then NULL is returned. - * Otherwise the address of a done aio_result_t is returned. - */ -aio_result_t * -_aio_req_done(void) -{ - aio_req_t *reqp; - aio_result_t *resultp; - - ASSERT(MUTEX_HELD(&__aio_mutex)); - - if ((reqp = _aio_done_tail) != NULL) { - if ((_aio_done_tail = reqp->req_next) == NULL) - _aio_done_head = NULL; - ASSERT(_aio_donecnt > 0); - _aio_donecnt--; - (void) _aio_hash_del(reqp->req_resultp); - resultp = reqp->req_resultp; - ASSERT(reqp->req_state == AIO_REQ_DONE); - _aio_req_free(reqp); - return (resultp); - } - /* is queue empty? */ - if (reqp == NULL && _aio_outstand_cnt == 0) { - return ((aio_result_t *)-1); - } - return (NULL); -} - -/* - * Set the return and errno values for the application's use. - * - * For the Posix interfaces, we must set the return value first followed - * by the errno value because the Posix interfaces allow for a change - * in the errno value from EINPROGRESS to something else to signal - * the completion of the asynchronous request. - * - * The opposite is true for the Solaris interfaces. These allow for - * a change in the return value from AIO_INPROGRESS to something else - * to signal the completion of the asynchronous request. - */ -void -_aio_set_result(aio_req_t *reqp, ssize_t retval, int error) -{ - aio_result_t *resultp = reqp->req_resultp; - - if (POSIX_AIO(reqp)) { - resultp->aio_return = retval; - membar_producer(); - resultp->aio_errno = error; - } else { - resultp->aio_errno = error; - membar_producer(); - resultp->aio_return = retval; - } -} - -/* - * Add an AIO request onto the next work queue. - * A circular list of workers is used to choose the next worker. - */ -void -_aio_req_add(aio_req_t *reqp, aio_worker_t **nextworker, int mode) -{ - aio_worker_t *aiowp; - aio_worker_t *first; - int load_bal_flg = 1; - int found; - - ASSERT(reqp->req_state != AIO_REQ_DONEQ); - reqp->req_next = NULL; - /* - * Try to acquire the next worker's work queue. If it is locked, - * then search the list of workers until a queue is found unlocked, - * or until the list is completely traversed at which point another - * worker will be created. - */ - _sigoff(); /* defer SIGIO */ - sig_mutex_lock(&__aio_mutex); - first = aiowp = *nextworker; - if (mode != AIONOTIFY) - _aio_outstand_cnt++; - sig_mutex_unlock(&__aio_mutex); - - switch (mode) { - case AIOREAD: - case AIOWRITE: - case AIOAREAD: - case AIOAWRITE: -#if !defined(_LP64) - case AIOAREAD64: - case AIOAWRITE64: -#endif - /* try to find an idle worker */ - found = 0; - do { - if (sig_mutex_trylock(&aiowp->work_qlock1) == 0) { - if (aiowp->work_idleflg) { - found = 1; - break; - } - sig_mutex_unlock(&aiowp->work_qlock1); - } - } while ((aiowp = aiowp->work_forw) != first); - - if (found) { - aiowp->work_minload1++; - break; - } - - /* try to acquire some worker's queue lock */ - do { - if (sig_mutex_trylock(&aiowp->work_qlock1) == 0) { - found = 1; - break; - } - } while ((aiowp = aiowp->work_forw) != first); - - /* - * Create more workers when the workers appear overloaded. - * Either all the workers are busy draining their queues - * or no worker's queue lock could be acquired. - */ - if (!found) { - if (_aio_worker_cnt < _max_workers) { - if (_aio_create_worker(reqp, mode)) - _aiopanic("_aio_req_add: add worker"); - _sigon(); /* reenable SIGIO */ - return; - } - - /* - * No worker available and we have created - * _max_workers, keep going through the - * list slowly until we get a lock - */ - while (sig_mutex_trylock(&aiowp->work_qlock1) != 0) { - /* - * give someone else a chance - */ - _aio_delay(1); - aiowp = aiowp->work_forw; - } - } - - ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); - if (_aio_worker_cnt < _max_workers && - aiowp->work_minload1 >= _minworkload) { - sig_mutex_unlock(&aiowp->work_qlock1); - sig_mutex_lock(&__aio_mutex); - *nextworker = aiowp->work_forw; - sig_mutex_unlock(&__aio_mutex); - if (_aio_create_worker(reqp, mode)) - _aiopanic("aio_req_add: add worker"); - _sigon(); /* reenable SIGIO */ - return; - } - aiowp->work_minload1++; - break; - case AIOFSYNC: - case AIONOTIFY: - load_bal_flg = 0; - sig_mutex_lock(&aiowp->work_qlock1); - break; - default: - _aiopanic("_aio_req_add: invalid mode"); - break; - } - /* - * Put request onto worker's work queue. - */ - if (aiowp->work_tail1 == NULL) { - ASSERT(aiowp->work_count1 == 0); - aiowp->work_tail1 = reqp; - aiowp->work_next1 = reqp; - } else { - aiowp->work_head1->req_next = reqp; - if (aiowp->work_next1 == NULL) - aiowp->work_next1 = reqp; - } - reqp->req_state = AIO_REQ_QUEUED; - reqp->req_worker = aiowp; - aiowp->work_head1 = reqp; - /* - * Awaken worker if it is not currently active. - */ - if (aiowp->work_count1++ == 0 && aiowp->work_idleflg) { - aiowp->work_idleflg = 0; - (void) cond_signal(&aiowp->work_idle_cv); - } - sig_mutex_unlock(&aiowp->work_qlock1); - - if (load_bal_flg) { - sig_mutex_lock(&__aio_mutex); - *nextworker = aiowp->work_forw; - sig_mutex_unlock(&__aio_mutex); - } - _sigon(); /* reenable SIGIO */ -} - -/* - * Get an AIO request for a specified worker. - * If the work queue is empty, return NULL. - */ -aio_req_t * -_aio_req_get(aio_worker_t *aiowp) -{ - aio_req_t *reqp; - - sig_mutex_lock(&aiowp->work_qlock1); - if ((reqp = aiowp->work_next1) != NULL) { - /* - * Remove a POSIX request from the queue; the - * request queue is a singularly linked list - * with a previous pointer. The request is - * removed by updating the previous pointer. - * - * Non-posix requests are left on the queue - * to eventually be placed on the done queue. - */ - - if (POSIX_AIO(reqp)) { - if (aiowp->work_prev1 == NULL) { - aiowp->work_tail1 = reqp->req_next; - if (aiowp->work_tail1 == NULL) - aiowp->work_head1 = NULL; - } else { - aiowp->work_prev1->req_next = reqp->req_next; - if (aiowp->work_head1 == reqp) - aiowp->work_head1 = reqp->req_next; - } - - } else { - aiowp->work_prev1 = reqp; - ASSERT(aiowp->work_done1 >= 0); - aiowp->work_done1++; - } - ASSERT(reqp != reqp->req_next); - aiowp->work_next1 = reqp->req_next; - ASSERT(aiowp->work_count1 >= 1); - aiowp->work_count1--; - switch (reqp->req_op) { - case AIOREAD: - case AIOWRITE: - case AIOAREAD: - case AIOAWRITE: -#if !defined(_LP64) - case AIOAREAD64: - case AIOAWRITE64: -#endif - ASSERT(aiowp->work_minload1 > 0); - aiowp->work_minload1--; - break; - } - reqp->req_state = AIO_REQ_INPROGRESS; - } - aiowp->work_req = reqp; - ASSERT(reqp != NULL || aiowp->work_count1 == 0); - sig_mutex_unlock(&aiowp->work_qlock1); - return (reqp); -} - -static void -_aio_req_del(aio_worker_t *aiowp, aio_req_t *reqp, int ostate) -{ - aio_req_t **last; - aio_req_t *lastrp; - aio_req_t *next; - - ASSERT(aiowp != NULL); - ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); - if (POSIX_AIO(reqp)) { - if (ostate != AIO_REQ_QUEUED) - return; - } - last = &aiowp->work_tail1; - lastrp = aiowp->work_tail1; - ASSERT(ostate == AIO_REQ_QUEUED || ostate == AIO_REQ_INPROGRESS); - while ((next = *last) != NULL) { - if (next == reqp) { - *last = next->req_next; - if (aiowp->work_next1 == next) - aiowp->work_next1 = next->req_next; - - if ((next->req_next != NULL) || - (aiowp->work_done1 == 0)) { - if (aiowp->work_head1 == next) - aiowp->work_head1 = next->req_next; - if (aiowp->work_prev1 == next) - aiowp->work_prev1 = next->req_next; - } else { - if (aiowp->work_head1 == next) - aiowp->work_head1 = lastrp; - if (aiowp->work_prev1 == next) - aiowp->work_prev1 = lastrp; - } - - if (ostate == AIO_REQ_QUEUED) { - ASSERT(aiowp->work_count1 >= 1); - aiowp->work_count1--; - ASSERT(aiowp->work_minload1 >= 1); - aiowp->work_minload1--; - } else { - ASSERT(ostate == AIO_REQ_INPROGRESS && - !POSIX_AIO(reqp)); - aiowp->work_done1--; - } - return; - } - last = &next->req_next; - lastrp = next; - } - /* NOTREACHED */ -} - -static void -_aio_enq_doneq(aio_req_t *reqp) -{ - if (_aio_doneq == NULL) { - _aio_doneq = reqp; - reqp->req_next = reqp->req_prev = reqp; - } else { - reqp->req_next = _aio_doneq; - reqp->req_prev = _aio_doneq->req_prev; - _aio_doneq->req_prev->req_next = reqp; - _aio_doneq->req_prev = reqp; - } - reqp->req_state = AIO_REQ_DONEQ; - _aio_doneq_cnt++; -} - -/* - * caller owns the _aio_mutex - */ -aio_req_t * -_aio_req_remove(aio_req_t *reqp) -{ - if (reqp && reqp->req_state != AIO_REQ_DONEQ) - return (NULL); - - if (reqp) { - /* request in done queue */ - if (_aio_doneq == reqp) - _aio_doneq = reqp->req_next; - if (_aio_doneq == reqp) { - /* only one request on queue */ - _aio_doneq = NULL; - } else { - aio_req_t *tmp = reqp->req_next; - reqp->req_prev->req_next = tmp; - tmp->req_prev = reqp->req_prev; - } - } else if ((reqp = _aio_doneq) != NULL) { - if (reqp == reqp->req_next) { - /* only one request on queue */ - _aio_doneq = NULL; - } else { - reqp->req_prev->req_next = _aio_doneq = reqp->req_next; - _aio_doneq->req_prev = reqp->req_prev; - } - } - if (reqp) { - _aio_doneq_cnt--; - reqp->req_next = reqp->req_prev = reqp; - reqp->req_state = AIO_REQ_DONE; - } - return (reqp); -} - -/* - * An AIO request is identified by an aio_result_t pointer. The library - * maps this aio_result_t pointer to its internal representation using a - * hash table. This function adds an aio_result_t pointer to the hash table. - */ -static int -_aio_hash_insert(aio_result_t *resultp, aio_req_t *reqp) -{ - aio_hash_t *hashp; - aio_req_t **prev; - aio_req_t *next; - - hashp = _aio_hash + AIOHASH(resultp); - sig_mutex_lock(&hashp->hash_lock); - prev = &hashp->hash_ptr; - while ((next = *prev) != NULL) { - if (resultp == next->req_resultp) { - sig_mutex_unlock(&hashp->hash_lock); - return (-1); - } - prev = &next->req_link; - } - *prev = reqp; - ASSERT(reqp->req_link == NULL); - sig_mutex_unlock(&hashp->hash_lock); - return (0); -} - -/* - * Remove an entry from the hash table. - */ -aio_req_t * -_aio_hash_del(aio_result_t *resultp) -{ - aio_hash_t *hashp; - aio_req_t **prev; - aio_req_t *next = NULL; - - if (_aio_hash != NULL) { - hashp = _aio_hash + AIOHASH(resultp); - sig_mutex_lock(&hashp->hash_lock); - prev = &hashp->hash_ptr; - while ((next = *prev) != NULL) { - if (resultp == next->req_resultp) { - *prev = next->req_link; - next->req_link = NULL; - break; - } - prev = &next->req_link; - } - sig_mutex_unlock(&hashp->hash_lock); - } - return (next); -} - -/* - * find an entry in the hash table - */ -aio_req_t * -_aio_hash_find(aio_result_t *resultp) -{ - aio_hash_t *hashp; - aio_req_t **prev; - aio_req_t *next = NULL; - - if (_aio_hash != NULL) { - hashp = _aio_hash + AIOHASH(resultp); - sig_mutex_lock(&hashp->hash_lock); - prev = &hashp->hash_ptr; - while ((next = *prev) != NULL) { - if (resultp == next->req_resultp) - break; - prev = &next->req_link; - } - sig_mutex_unlock(&hashp->hash_lock); - } - return (next); -} - -/* - * AIO interface for POSIX - */ -int -_aio_rw(aiocb_t *aiocbp, aio_lio_t *lio_head, aio_worker_t **nextworker, - int mode, int flg) -{ - aio_req_t *reqp; - aio_args_t *ap; - int kerr; - - if (aiocbp == NULL) { - errno = EINVAL; - return (-1); - } - - /* initialize kaio */ - if (!_kaio_ok) - _kaio_init(); - - aiocbp->aio_state = NOCHECK; - - /* - * If we have been called because a list I/O - * kaio() failed, we dont want to repeat the - * system call - */ - - if (flg & AIO_KAIO) { - /* - * Try kernel aio first. - * If errno is ENOTSUP/EBADFD, - * fall back to the thread implementation. - */ - if (_kaio_ok > 0 && KAIO_SUPPORTED(aiocbp->aio_fildes)) { - aiocbp->aio_resultp.aio_errno = EINPROGRESS; - aiocbp->aio_state = CHECK; - kerr = (int)_kaio(mode, aiocbp); - if (kerr == 0) - return (0); - if (errno != ENOTSUP && errno != EBADFD) { - aiocbp->aio_resultp.aio_errno = errno; - aiocbp->aio_resultp.aio_return = -1; - aiocbp->aio_state = NOCHECK; - return (-1); - } - if (errno == EBADFD) - SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); - } - } - - aiocbp->aio_resultp.aio_errno = EINPROGRESS; - aiocbp->aio_state = USERAIO; - - if (!__uaio_ok && __uaio_init() == -1) - return (-1); - - if ((reqp = _aio_req_alloc()) == NULL) { - errno = EAGAIN; - return (-1); - } - - /* - * If an LIO request, add the list head to the aio request - */ - reqp->req_head = lio_head; - reqp->req_type = AIO_POSIX_REQ; - reqp->req_op = mode; - reqp->req_largefile = 0; - - if (aiocbp->aio_sigevent.sigev_notify == SIGEV_NONE) { - reqp->req_sigevent.sigev_notify = SIGEV_NONE; - } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { - reqp->req_sigevent.sigev_notify = SIGEV_SIGNAL; - reqp->req_sigevent.sigev_signo = - aiocbp->aio_sigevent.sigev_signo; - reqp->req_sigevent.sigev_value.sival_ptr = - aiocbp->aio_sigevent.sigev_value.sival_ptr; - } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_PORT) { - port_notify_t *pn = aiocbp->aio_sigevent.sigev_value.sival_ptr; - reqp->req_sigevent.sigev_notify = SIGEV_PORT; - /* - * Reuse the sigevent structure to contain the port number - * and the user value. Same for SIGEV_THREAD, below. - */ - reqp->req_sigevent.sigev_signo = - pn->portnfy_port; - reqp->req_sigevent.sigev_value.sival_ptr = - pn->portnfy_user; - } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_THREAD) { - reqp->req_sigevent.sigev_notify = SIGEV_THREAD; - /* - * The sigevent structure contains the port number - * and the user value. Same for SIGEV_PORT, above. - */ - reqp->req_sigevent.sigev_signo = - aiocbp->aio_sigevent.sigev_signo; - reqp->req_sigevent.sigev_value.sival_ptr = - aiocbp->aio_sigevent.sigev_value.sival_ptr; - } - - reqp->req_resultp = &aiocbp->aio_resultp; - reqp->req_aiocbp = aiocbp; - ap = &reqp->req_args; - ap->fd = aiocbp->aio_fildes; - ap->buf = (caddr_t)aiocbp->aio_buf; - ap->bufsz = aiocbp->aio_nbytes; - ap->offset = aiocbp->aio_offset; - - if ((flg & AIO_NO_DUPS) && - _aio_hash_insert(&aiocbp->aio_resultp, reqp) != 0) { - _aiopanic("_aio_rw(): request already in hash table"); - _aio_req_free(reqp); - errno = EINVAL; - return (-1); - } - _aio_req_add(reqp, nextworker, mode); - return (0); -} - -#if !defined(_LP64) -/* - * 64-bit AIO interface for POSIX - */ -int -_aio_rw64(aiocb64_t *aiocbp, aio_lio_t *lio_head, aio_worker_t **nextworker, - int mode, int flg) -{ - aio_req_t *reqp; - aio_args_t *ap; - int kerr; - - if (aiocbp == NULL) { - errno = EINVAL; - return (-1); - } - - /* initialize kaio */ - if (!_kaio_ok) - _kaio_init(); - - aiocbp->aio_state = NOCHECK; - - /* - * If we have been called because a list I/O - * kaio() failed, we dont want to repeat the - * system call - */ - - if (flg & AIO_KAIO) { - /* - * Try kernel aio first. - * If errno is ENOTSUP/EBADFD, - * fall back to the thread implementation. - */ - if (_kaio_ok > 0 && KAIO_SUPPORTED(aiocbp->aio_fildes)) { - aiocbp->aio_resultp.aio_errno = EINPROGRESS; - aiocbp->aio_state = CHECK; - kerr = (int)_kaio(mode, aiocbp); - if (kerr == 0) - return (0); - if (errno != ENOTSUP && errno != EBADFD) { - aiocbp->aio_resultp.aio_errno = errno; - aiocbp->aio_resultp.aio_return = -1; - aiocbp->aio_state = NOCHECK; - return (-1); - } - if (errno == EBADFD) - SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); - } - } - - aiocbp->aio_resultp.aio_errno = EINPROGRESS; - aiocbp->aio_state = USERAIO; - - if (!__uaio_ok && __uaio_init() == -1) - return (-1); - - if ((reqp = _aio_req_alloc()) == NULL) { - errno = EAGAIN; - return (-1); - } - - /* - * If an LIO request, add the list head to the aio request - */ - reqp->req_head = lio_head; - reqp->req_type = AIO_POSIX_REQ; - reqp->req_op = mode; - reqp->req_largefile = 1; - - if (aiocbp->aio_sigevent.sigev_notify == SIGEV_NONE) { - reqp->req_sigevent.sigev_notify = SIGEV_NONE; - } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { - reqp->req_sigevent.sigev_notify = SIGEV_SIGNAL; - reqp->req_sigevent.sigev_signo = - aiocbp->aio_sigevent.sigev_signo; - reqp->req_sigevent.sigev_value.sival_ptr = - aiocbp->aio_sigevent.sigev_value.sival_ptr; - } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_PORT) { - port_notify_t *pn = aiocbp->aio_sigevent.sigev_value.sival_ptr; - reqp->req_sigevent.sigev_notify = SIGEV_PORT; - reqp->req_sigevent.sigev_signo = - pn->portnfy_port; - reqp->req_sigevent.sigev_value.sival_ptr = - pn->portnfy_user; - } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_THREAD) { - reqp->req_sigevent.sigev_notify = SIGEV_THREAD; - reqp->req_sigevent.sigev_signo = - aiocbp->aio_sigevent.sigev_signo; - reqp->req_sigevent.sigev_value.sival_ptr = - aiocbp->aio_sigevent.sigev_value.sival_ptr; - } - - reqp->req_resultp = &aiocbp->aio_resultp; - reqp->req_aiocbp = aiocbp; - ap = &reqp->req_args; - ap->fd = aiocbp->aio_fildes; - ap->buf = (caddr_t)aiocbp->aio_buf; - ap->bufsz = aiocbp->aio_nbytes; - ap->offset = aiocbp->aio_offset; - - if ((flg & AIO_NO_DUPS) && - _aio_hash_insert(&aiocbp->aio_resultp, reqp) != 0) { - _aiopanic("_aio_rw64(): request already in hash table"); - _aio_req_free(reqp); - errno = EINVAL; - return (-1); - } - _aio_req_add(reqp, nextworker, mode); - return (0); -} -#endif /* !defined(_LP64) */ diff --git a/usr/src/lib/libaio/common/libaio.h b/usr/src/lib/libaio/common/libaio.h deleted file mode 100644 index dfbc1dd19f..0000000000 --- a/usr/src/lib/libaio/common/libaio.h +++ /dev/null @@ -1,396 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBAIO_H -#define _LIBAIO_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "c_synonyms.h" -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <thread.h> -#include <pthread.h> -#include <asynch.h> -#include <setjmp.h> -#include <signal.h> -#include <siginfo.h> -#include <aio.h> -#include <limits.h> -#include <ucontext.h> -#include <sys/mman.h> - -#if defined(DEBUG) -extern int assfail(char *, char *, int); -#define ASSERT(EX) ((void)((EX) || assfail(#EX, __FILE__, __LINE__))) -#else -#define ASSERT(EX) -#endif - -#if !defined(_LP64) -#define AIOSTKSIZE (64 * 1024) -#else -#define AIOSTKSIZE (128 * 1024) -#endif - -#define SIGAIOCANCEL SIGLWP /* special aio cancelation signal */ - -#define AIO_WAITN_MAXIOCBS 32768 /* max. iocbs per system call */ - -/* - * Declare structure types. The structures themselves are defined below. - */ -typedef struct aio_args aio_args_t; -typedef struct aio_lio aio_lio_t; -typedef struct notif_param notif_param_t; -typedef struct aio_req aio_req_t; -typedef struct aio_worker aio_worker_t; -typedef struct aio_hash aio_hash_t; - -struct aio_args { - int fd; - caddr_t buf; - size_t bufsz; - offset_t offset; -}; - -/* - * list head for UFS list I/O - */ -struct aio_lio { - mutex_t lio_mutex; /* list mutex */ - cond_t lio_cond_cv; /* list notification for I/O done */ - aio_lio_t *lio_next; /* pointer to next on freelist */ - char lio_mode; /* LIO_WAIT/LIO_NOWAIT */ - char lio_canned; /* lio was canceled */ - char lio_largefile; /* largefile operation */ - char lio_waiting; /* waiting in __lio_listio() */ - int lio_nent; /* Number of list I/O's */ - int lio_refcnt; /* outstanding I/O's */ - int lio_event; /* Event number for notification */ - int lio_port; /* Port number for notification */ - int lio_signo; /* Signal number for notification */ - union sigval lio_sigval; /* Signal parameter */ - uintptr_t lio_object; /* for SIGEV_THREAD or SIGEV_PORT */ - struct sigevent *lio_sigevent; /* Notification function and attr. */ -}; - -/* - * Notification parameters - */ -struct notif_param { - int np_signo; /* SIGEV_SIGNAL */ - int np_port; /* SIGEV_THREAD or SIGEV_PORT */ - void *np_user; - int np_event; - uintptr_t np_object; - int np_lio_signo; /* listio: SIGEV_SIGNAL */ - int np_lio_port; /* listio: SIGEV_THREAD or SIGEV_PORT */ - void *np_lio_user; - int np_lio_event; - uintptr_t np_lio_object; -}; - -struct aio_req { - /* - * fields protected by _aio_mutex lock. - */ - aio_req_t *req_link; /* hash/freelist chain link */ - /* - * when req is on the doneq, then req_next is protected by - * the _aio_mutex lock. when the req is on a work q, then - * req_next is protected by a worker's work_qlock1 lock. - */ - aio_req_t *req_next; /* request/done queue link */ - aio_req_t *req_prev; /* double linked list */ - /* - * fields protected by a worker's work_qlock1 lock. - */ - char req_state; /* AIO_REQ_QUEUED, ... */ - /* - * fields require no locking. - */ - char req_type; /* AIO_POSIX_REQ or not */ - char req_largefile; /* largefile operation */ - char req_op; /* AIOREAD, etc. */ - aio_worker_t *req_worker; /* associate request with worker */ - aio_result_t *req_resultp; /* address of result buffer */ - aio_args_t req_args; /* arglist */ - aio_lio_t *req_head; /* list head for LIO */ - struct sigevent req_sigevent; - void *req_aiocbp; /* ptr to aiocb or aiocb64 */ - notif_param_t req_notify; /* notification parameters */ -}; - -/* special lio type that destroys itself when lio refcnt becomes zero */ -#define LIO_FSYNC LIO_WAIT+1 -#define LIO_DESTROY LIO_FSYNC+1 - -/* lio flags */ -#define LIO_FSYNC_CANCELED 0x1 - -/* values for aio_state */ - -#define AIO_REQ_QUEUED 1 -#define AIO_REQ_INPROGRESS 2 -#define AIO_REQ_CANCELED 3 -#define AIO_REQ_DONE 4 -#define AIO_REQ_FREE 5 -#define AIO_REQ_DONEQ 6 - -/* use KAIO in _aio_rw() */ -#define AIO_NO_KAIO 0x0 -#define AIO_KAIO 0x1 -#define AIO_NO_DUPS 0x2 - -#define AIO_POSIX_REQ 0x1 - -#define CHECK 1 -#define NOCHECK 2 -#define CHECKED 3 -#define USERAIO 4 -#define USERAIO_DONE 5 - -/* values for _aio_flags */ - -/* - * if set, _aiodone() notifies aio_waitn about done requests - * from the threads - */ -#define AIO_WAIT_INPROGRESS 0x1 - -/* - * if set, _aiodone() wakes up functions waiting for completed I/Os - */ -#define AIO_IO_WAITING 0x2 - -#define AIO_LIB_WAITN 0x4 /* aio_waitn in progress */ -#define AIO_LIB_WAITN_PENDING 0x8 /* aio_waitn requests pending */ - -/* - * Before a kaio() system call, the fd will be checked - * to ensure that kernel async. I/O is supported for this file. - * The only way to find out is if a kaio() call returns ENOTSUP, - * so the default will always be to try the kaio() call. Only in - * the specific instance of a kaio() call returning ENOTSUP - * will we stop submitting kaio() calls for that fd. - * If the fd is outside the array bounds, we will allow the kaio() - * call. - * - * The only way that an fd entry can go from ENOTSUP to supported - * is if that fd is freed up by a close(), and close will clear - * the entry for that fd. - * - * Each fd gets a bit in the array _kaio_supported[]. - * - * uint32_t _kaio_supported[MAX_KAIO_FDARRAY_SIZE]; - * - * Array is MAX_KAIO_ARRAY_SIZE of 32-bit elements, for 8kb. - * If more than (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE) - * files are open, this can be expanded. - */ - -#define MAX_KAIO_FDARRAY_SIZE 2048 -#define KAIO_FDARRAY_ELEM_SIZE WORD_BIT /* uint32_t */ - -#define MAX_KAIO_FDS (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE) - -#define VALID_FD(fdes) ((fdes) >= 0 && (fdes) < MAX_KAIO_FDS) - -#define KAIO_SUPPORTED(fdes) \ - (!VALID_FD(fdes) || \ - ((_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] & \ - (uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))) == 0)) - -#define SET_KAIO_NOT_SUPPORTED(fdes) \ - if (VALID_FD(fdes)) \ - _kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] |= \ - (uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE)) - -#define CLEAR_KAIO_SUPPORTED(fdes) \ - if (VALID_FD(fdes)) \ - _kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] &= \ - ~(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE)) - -struct aio_worker { - aio_worker_t *work_forw; /* forward link in list of workers */ - aio_worker_t *work_backw; /* backwards link in list of workers */ - mutex_t work_qlock1; /* lock for work queue 1 */ - cond_t work_idle_cv; /* place to sleep when idle */ - aio_req_t *work_head1; /* head of work request queue 1 */ - aio_req_t *work_tail1; /* tail of work request queue 1 */ - aio_req_t *work_next1; /* work queue one's next pointer */ - aio_req_t *work_prev1; /* last request done from queue 1 */ - aio_req_t *work_req; /* active work request */ - thread_t work_tid; /* worker's thread-id */ - int work_count1; /* length of work queue one */ - int work_done1; /* number of requests done */ - int work_minload1; /* min length of queue */ - int work_idleflg; /* when set, worker is idle */ - sigjmp_buf work_jmp_buf; /* cancellation point */ -}; - -struct aio_hash { /* resultp hash table */ - mutex_t hash_lock; - aio_req_t *hash_ptr; -#if !defined(_LP64) - void *hash_pad; /* ensure sizeof (aio_hash_t) == 32 */ -#endif -}; - -extern aio_hash_t *_aio_hash; - -#define HASHSZ 2048 /* power of 2 */ -#define AIOHASH(resultp) ((((uintptr_t)(resultp) >> 17) ^ \ - ((uintptr_t)(resultp) >> 2)) & (HASHSZ - 1)) -#define POSIX_AIO(x) ((x)->req_type == AIO_POSIX_REQ) - -/* - * _sigoff(), _sigon(), and _sigdeferred() are consolidation-private - * interfaces in libc that defer signals, enable signals, and return - * the deferred signal number (if any), respectively. - * Calls to _sigoff() and _sigon() can nest but must be balanced, - * so nested calls to these functions work properly. - */ -extern void _sigoff(void); -extern void _sigon(void); -extern int _sigdeferred(void); - -/* - * The following five functions are the same as the corresponding - * libc functions without the 'sig_' prefix, except that all signals - * are deferred while the lock is held. Their use in the library - * makes the aio interfaces async-signal safe. - */ -extern void sig_mutex_lock(mutex_t *); -extern void sig_mutex_unlock(mutex_t *); -extern int sig_mutex_trylock(mutex_t *); -extern int sig_cond_wait(cond_t *, mutex_t *); -extern int sig_cond_reltimedwait(cond_t *, mutex_t *, const timespec_t *); - -extern int __uaio_init(void); -extern void _kaio_init(void); -extern intptr_t _kaio(int, ...); -extern int _aiorw(int, caddr_t, int, offset_t, int, aio_result_t *, int); -extern int _aio_rw(aiocb_t *, aio_lio_t *, aio_worker_t **, int, int); -#if !defined(_LP64) -extern int _aio_rw64(aiocb64_t *, aio_lio_t *, aio_worker_t **, int, int); -#endif -extern int _aio_create_worker(aio_req_t *, int); - -extern int _aio_cancel_req(aio_worker_t *, aio_req_t *, int *, int *); -extern int aiocancel_all(int); -extern void init_signals(void); - -extern void _aiopanic(char *); -extern aio_req_t *_aio_hash_find(aio_result_t *); -extern aio_req_t *_aio_hash_del(aio_result_t *); -extern void _aio_req_mark_done(aio_req_t *); -extern void _aio_waitn_wakeup(void); - -extern aio_worker_t *_aio_worker_alloc(void); -extern void _aio_worker_free(void *); -extern aio_req_t *_aio_req_alloc(void); -extern void _aio_req_free(aio_req_t *); -extern aio_lio_t *_aio_lio_alloc(void); -extern void _aio_lio_free(aio_lio_t *); - -extern void _aio_idle(aio_worker_t *); -extern void *_aio_do_request(void *); -extern void *_aio_do_notify(void *); -extern void _lio_remove(aio_req_t *); -extern aio_req_t *_aio_req_remove(aio_req_t *); -extern int _aio_get_timedelta(timespec_t *, timespec_t *); - -extern int _close(int); -extern int __sigqueue(pid_t pid, int signo, - /* const union sigval */ void *value, int si_code); -extern int _sigaction(int sig, const struct sigaction *act, - struct sigaction *oact); -extern int _sigemptyset(sigset_t *set); -extern int _sigaddset(sigset_t *set, int signo); -extern int _sigismember(const sigset_t *set, int signo); - -extern aio_result_t *_aio_req_done(void); -extern void _aio_set_result(aio_req_t *, ssize_t, int); - -extern aio_worker_t *_kaiowp; /* points to kaio cleanup thread */ -extern aio_worker_t *__workers_rw; /* list of all rw workers */ -extern aio_worker_t *__nextworker_rw; /* worker chosen for next rw request */ -extern int __rw_workerscnt; /* number of rw workers */ -extern aio_worker_t *__workers_no; /* list of all notification workers */ -extern aio_worker_t *__nextworker_no; /* worker chosen, next notification */ -extern int __no_workerscnt; /* number of notification workers */ -extern mutex_t __aio_initlock; /* makes aio initialization atomic */ -extern mutex_t __aio_mutex; /* global aio lock */ -extern cond_t _aio_iowait_cv; /* wait for userland I/Os */ -extern cond_t _aio_waitn_cv; /* wait for end of aio_waitn */ -extern int _max_workers; /* max number of workers permitted */ -extern int _min_workers; /* min number of workers */ -extern sigset_t _worker_set; /* worker's signal mask */ -extern sigset_t _full_set; /* all signals (sigfillset()) */ -extern int _aio_worker_cnt; /* number of AIO workers */ -extern int _sigio_enabled; /* when set, send SIGIO signal */ -extern pid_t __pid; /* process's PID */ -extern int __uaio_ok; /* indicates if aio is initialized */ -extern int _kaio_ok; /* indicates if kaio is initialized */ -extern pthread_key_t _aio_key; /* for thread-specific data */ - -extern aio_req_t *_aio_done_tail; /* list of done requests */ -extern aio_req_t *_aio_done_head; -extern aio_req_t *_aio_doneq; -extern int _aio_freelist_cnt; -extern int _aio_allocated_cnt; -extern int _aio_donecnt; -extern int _aio_doneq_cnt; -extern int _aio_waitncnt; /* # of requests for aio_waitn */ -extern int _aio_outstand_cnt; /* # of outstanding requests */ -extern int _kaio_outstand_cnt; /* # of outstanding kaio requests */ -extern int _aio_req_done_cnt; /* req. done but not in "done queue" */ -extern int _aio_kernel_suspend; /* active kernel kaio calls */ -extern int _aio_suscv_cnt; /* aio_suspend calls waiting on cv's */ -extern int _aiowait_flag; /* when set, aiowait() is inprogress */ -extern int _aio_flags; /* see libaio.h defines for */ - -/* - * Array for determining whether or not a file supports kaio - */ -extern uint32_t *_kaio_supported; - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBAIO_H */ diff --git a/usr/src/lib/libaio/common/llib-laio b/usr/src/lib/libaio/common/llib-laio index e3737a4b41..02d00ba1db 100644 --- a/usr/src/lib/libaio/common/llib-laio +++ b/usr/src/lib/libaio/common/llib-laio @@ -19,58 +19,27 @@ * CDDL HEADER END */ -/* LINTLIBRARY */ -/* PROTOLIB1 */ - /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* LINTLIBRARY */ +/* PROTOLIB1 */ -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <signal.h> -#include <libaio.h> +#pragma ident "%Z%%M% %I% %E% SMI" -/* - * usr/src/lib/libaio/common - */ +#include <sys/asynch.h> -/* aio.c */ int aioread(int fd, caddr_t buf, int bufsz, off_t offset, int whence, aio_result_t *resultp); int aiowrite(int fd, caddr_t buf, int bufsz, off_t offset, int whence, aio_result_t *resultp); +#if !defined(_LP64) int aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, aio_result_t *resultp); int aiowrite64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, aio_result_t *resultp); +#endif /* !_LP64 */ int aiocancel(aio_result_t *resultp); aio_result_t *aiowait(struct timeval *uwait); - -/* scalls.c */ -int _libaio_close(int fd); - -/* posix_aio.c */ -int __aio_read(aiocb_t *cb); -int __aio_write(aiocb_t *cb); -int __lio_listio(int mode, aiocb_t * const list[], - int nent, struct sigevent *sig); -int __aio_suspend(void **list, int nent, const timespec_t *timo, int lf); -int __aio_error(aiocb_t *cb); -ssize_t __aio_return(aiocb_t *cb); -int __aio_fsync(int op, aiocb_t *aiocbp); -int __aio_cancel(int fd, aiocb_t *aiocbp); -int __aio_waitn(void **list, uint_t nent, uint_t *nwait, - const struct timespec *timeout); -int __aio_read64(aiocb64_t *cb); -int __aio_write64(aiocb64_t *cb); -int __lio_listio64(int mode, aiocb64_t *const list[], - int nent, struct sigevent *sig); -int __aio_error64(aiocb64_t *cb); -ssize_t __aio_return64(aiocb64_t *cb); -int __aio_fsync64(int op, aiocb64_t *aiocbp); -int __aio_cancel64(int fd, aiocb64_t *aiocbp); diff --git a/usr/src/lib/libaio/common/ma.c b/usr/src/lib/libaio/common/ma.c deleted file mode 100644 index e5b26be616..0000000000 --- a/usr/src/lib/libaio/common/ma.c +++ /dev/null @@ -1,449 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libaio.h" - -/* - * libaio memory allocation strategy: - * - * For each of the structure types we wish to allocate/free - * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate - * chunks of memory which are then subdivided into individual - * elements which are put into a free list from which allocations - * are made and to which frees are returned. - * - * Chunks start small (8 Kbytes) and get larger (size doubling) - * as more chunks are needed. This keeps memory usage small for - * light use and fragmentation small for heavy use. - * - * Chunks are never unmapped except as an aftermath of fork() - * in the child process, when they are all unmapped (because - * all of the worker threads disappear in the child). - */ - -#define INITIAL_CHUNKSIZE (8 * 1024) - -/* - * The header structure for each chunk. - * A pointer and a size_t ensures proper alignment for whatever follows. - */ -typedef struct chunk { - struct chunk *chunk_next; /* linked list */ - size_t chunk_size; /* size of this chunk */ -} chunk_t; - -chunk_t *chunk_list = NULL; /* list of all chunks */ -mutex_t chunk_lock = DEFAULTMUTEX; - -chunk_t * -chunk_alloc(size_t size) -{ - chunk_t *chp = NULL; - void *ptr; - - ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, (off_t)0); - if (ptr != MAP_FAILED) { - sig_mutex_lock(&chunk_lock); - chp = ptr; - chp->chunk_next = chunk_list; - chunk_list = chp; - chp->chunk_size = size; - sig_mutex_unlock(&chunk_lock); - } - - return (chp); -} - -aio_worker_t *worker_freelist = NULL; /* free list of worker structures */ -aio_worker_t *worker_freelast = NULL; -size_t worker_chunksize = 0; -mutex_t worker_lock = DEFAULTMUTEX; - -/* - * Allocate a worker control block. - */ -aio_worker_t * -_aio_worker_alloc(void) -{ - aio_worker_t *aiowp; - chunk_t *chp; - size_t chunksize; - int nelem; - int i; - - sig_mutex_lock(&worker_lock); - if ((aiowp = worker_freelist) == NULL) { - if ((chunksize = 2 * worker_chunksize) == 0) - chunksize = INITIAL_CHUNKSIZE; - if ((chp = chunk_alloc(chunksize)) == NULL) { - sig_mutex_unlock(&worker_lock); - return (NULL); - } - worker_chunksize = chunksize; - worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1); - nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t); - for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++) - aiowp->work_forw = aiowp + 1; - worker_freelast = aiowp - 1; - worker_freelast->work_forw = NULL; - aiowp = worker_freelist; - } - if ((worker_freelist = aiowp->work_forw) == NULL) - worker_freelast = NULL; - sig_mutex_unlock(&worker_lock); - - aiowp->work_forw = NULL; - (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL); - (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL); - - return (aiowp); -} - -/* - * Free a worker control block. - * Declared with void *arg so it can be a pthread_key_create() destructor. - */ -void -_aio_worker_free(void *arg) -{ - aio_worker_t *aiowp = arg; - - (void) mutex_destroy(&aiowp->work_qlock1); - (void) cond_destroy(&aiowp->work_idle_cv); - (void) memset(aiowp, 0, sizeof (*aiowp)); - - sig_mutex_lock(&worker_lock); - if (worker_freelast == NULL) { - worker_freelist = worker_freelast = aiowp; - } else { - worker_freelast->work_forw = aiowp; - worker_freelast = aiowp; - } - sig_mutex_unlock(&worker_lock); -} - -aio_req_t *_aio_freelist = NULL; /* free list of request structures */ -aio_req_t *_aio_freelast = NULL; -size_t request_chunksize = 0; -int _aio_freelist_cnt = 0; -int _aio_allocated_cnt = 0; -mutex_t __aio_cache_lock = DEFAULTMUTEX; - -/* - * Allocate an aio request structure. - */ -aio_req_t * -_aio_req_alloc(void) -{ - aio_req_t *reqp; - chunk_t *chp; - size_t chunksize; - int nelem; - int i; - - sig_mutex_lock(&__aio_cache_lock); - if ((reqp = _aio_freelist) == NULL) { - if ((chunksize = 2 * request_chunksize) == 0) - chunksize = INITIAL_CHUNKSIZE; - if ((chp = chunk_alloc(chunksize)) == NULL) { - sig_mutex_unlock(&__aio_cache_lock); - return (NULL); - } - request_chunksize = chunksize; - _aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1); - nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t); - for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) { - reqp->req_state = AIO_REQ_FREE; - reqp->req_link = reqp + 1; - } - _aio_freelast = reqp - 1; - _aio_freelast->req_link = NULL; - _aio_freelist_cnt = nelem; - reqp = _aio_freelist; - } - if ((_aio_freelist = reqp->req_link) == NULL) - _aio_freelast = NULL; - _aio_freelist_cnt--; - _aio_allocated_cnt++; - sig_mutex_unlock(&__aio_cache_lock); - - ASSERT(reqp->req_state == AIO_REQ_FREE); - reqp->req_state = 0; - reqp->req_link = NULL; - reqp->req_sigevent.sigev_notify = SIGEV_NONE; - - return (reqp); -} - -/* - * Free an aio request structure. - */ -void -_aio_req_free(aio_req_t *reqp) -{ - ASSERT(reqp->req_state != AIO_REQ_FREE && - reqp->req_state != AIO_REQ_DONEQ); - (void) memset(reqp, 0, sizeof (*reqp)); - reqp->req_state = AIO_REQ_FREE; - - sig_mutex_lock(&__aio_cache_lock); - if (_aio_freelast == NULL) { - _aio_freelist = _aio_freelast = reqp; - } else { - _aio_freelast->req_link = reqp; - _aio_freelast = reqp; - } - _aio_freelist_cnt++; - _aio_allocated_cnt--; - sig_mutex_unlock(&__aio_cache_lock); -} - -aio_lio_t *_lio_head_freelist = NULL; /* free list of lio head structures */ -aio_lio_t *_lio_head_freelast = NULL; -size_t lio_head_chunksize = 0; -int _lio_alloc = 0; -int _lio_free = 0; -mutex_t __lio_mutex = DEFAULTMUTEX; - -/* - * Allocate a listio head structure. - */ -aio_lio_t * -_aio_lio_alloc(void) -{ - aio_lio_t *head; - chunk_t *chp; - size_t chunksize; - int nelem; - int i; - - sig_mutex_lock(&__lio_mutex); - if ((head = _lio_head_freelist) == NULL) { - if ((chunksize = 2 * lio_head_chunksize) == 0) - chunksize = INITIAL_CHUNKSIZE; - if ((chp = chunk_alloc(chunksize)) == NULL) { - sig_mutex_unlock(&__lio_mutex); - return (NULL); - } - lio_head_chunksize = chunksize; - _lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1); - nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t); - for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++) - head->lio_next = head + 1; - _lio_head_freelast = head - 1; - _lio_head_freelast->lio_next = NULL; - _lio_alloc += nelem; - _lio_free = nelem; - head = _lio_head_freelist; - } - if ((_lio_head_freelist = head->lio_next) == NULL) - _lio_head_freelast = NULL; - _lio_free--; - sig_mutex_unlock(&__lio_mutex); - - ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); - head->lio_next = NULL; - head->lio_port = -1; - (void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL); - (void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL); - - return (head); -} - -/* - * Free a listio head structure. - */ -void -_aio_lio_free(aio_lio_t *head) -{ - ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); - (void) mutex_destroy(&head->lio_mutex); - (void) cond_destroy(&head->lio_cond_cv); - (void) memset(head, 0, sizeof (*head)); - - sig_mutex_lock(&__lio_mutex); - if (_lio_head_freelast == NULL) { - _lio_head_freelist = _lio_head_freelast = head; - } else { - _lio_head_freelast->lio_next = head; - _lio_head_freelast = head; - } - _lio_free++; - sig_mutex_unlock(&__lio_mutex); -} - -static void -_aio_prepare_fork(void) -{ - /* acquire locks */ - sig_mutex_lock(&chunk_lock); -} - -static void -_aio_parent_fork(void) -{ - /* release locks */ - sig_mutex_unlock(&chunk_lock); -} - -static void -_aio_child_fork(void) -{ - chunk_t *chp; - - _aio_parent_fork(); /* release locks */ - - /* - * All of the workers are gone; free their structures. - */ - if (_kaio_supported != NULL) { - (void) munmap((void *)_kaio_supported, - MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t)); - _kaio_supported = NULL; - } - if (_aio_hash != NULL) { - (void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t)); - _aio_hash = NULL; - } - for (chp = chunk_list; chp != NULL; chp = chunk_list) { - chunk_list = chp->chunk_next; - (void) munmap((void *)chp, chp->chunk_size); - } - - /* - * Reinitialize global variables - */ - - worker_freelist = NULL; - worker_freelast = NULL; - worker_chunksize = 0; - (void) mutex_init(&worker_lock, USYNC_THREAD, NULL); - - _aio_freelist = NULL; - _aio_freelast = NULL; - request_chunksize = 0; - _aio_freelist_cnt = 0; - _aio_allocated_cnt = 0; - (void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL); - - _lio_head_freelist = NULL; - _lio_head_freelast = NULL; - lio_head_chunksize = 0; - _lio_alloc = 0; - _lio_free = 0; - (void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL); - - (void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL); - (void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL); - (void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL); - (void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL); - - _kaio_ok = 0; - __uaio_ok = 0; - - _kaiowp = NULL; - - __workers_rw = NULL; - __nextworker_rw = NULL; - __rw_workerscnt = 0; - - __workers_no = NULL; - __nextworker_no = NULL; - __no_workerscnt = 0; - - _aio_worker_cnt = 0; - - _aio_done_head = NULL; - _aio_done_tail = NULL; - _aio_donecnt = 0; - - _aio_doneq = NULL; - _aio_doneq_cnt = 0; - - _aio_waitncnt = 0; - _aio_outstand_cnt = 0; - _kaio_outstand_cnt = 0; - _aio_req_done_cnt = 0; - _aio_kernel_suspend = 0; - _aio_suscv_cnt = 0; - - _aiowait_flag = 0; - _aio_flags = 0; -} - -#define DISPLAY(var) \ - (void) fprintf(stderr, #var "\t= %d\n", var) - -static void -_aio_exit_info(void) -{ - if ((_kaio_ok | __uaio_ok) == 0) - return; - (void) fprintf(stderr, "\n"); - DISPLAY(_aio_freelist_cnt); - DISPLAY(_aio_allocated_cnt); - DISPLAY(_lio_alloc); - DISPLAY(_lio_free); - DISPLAY(__rw_workerscnt); - DISPLAY(__no_workerscnt); - DISPLAY(_aio_worker_cnt); - DISPLAY(_aio_donecnt); - DISPLAY(_aio_doneq_cnt); - DISPLAY(_aio_waitncnt); - DISPLAY(_aio_outstand_cnt); - DISPLAY(_kaio_outstand_cnt); - DISPLAY(_aio_req_done_cnt); - DISPLAY(_aio_kernel_suspend); - DISPLAY(_aio_suscv_cnt); - DISPLAY(_aiowait_flag); - DISPLAY(_aio_flags); -} - -#pragma init(_aio_init) -static void -_aio_init(void) -{ - char *str; - - (void) pthread_key_create(&_aio_key, _aio_worker_free); - (void) pthread_atfork(_aio_prepare_fork, - _aio_parent_fork, _aio_child_fork); - if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) { - if ((_min_workers = atoi(str)) <= 0) - _min_workers = 8; - } - if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) { - if ((_max_workers = atoi(str)) <= 0) - _max_workers = 256; - if (_max_workers < _min_workers + 1) - _max_workers = _min_workers + 1; - } - if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0) - (void) atexit(_aio_exit_info); -} diff --git a/usr/src/lib/libaio/common/posix_aio.c b/usr/src/lib/libaio/common/posix_aio.c deleted file mode 100644 index c72acbd5ac..0000000000 --- a/usr/src/lib/libaio/common/posix_aio.c +++ /dev/null @@ -1,1717 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * posix_aio.c implements the POSIX async. I/O - * functions for librt - * - * aio_read - * aio_write - * aio_error - * aio_return - * aio_suspend - * lio_listio - * aio_fsync - * aio_cancel - */ - -#include "libaio.h" -#include <atomic.h> -#include <sys/file.h> -#include <sys/port.h> - -extern int __fdsync(int, int); - -cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */ - -static int _aio_check_timeout(const timespec_t *, timespec_t *, int *); - -/* defines for timedwait in __aio_waitn() and __aio_suspend() */ -#define AIO_TIMEOUT_INDEF -1 -#define AIO_TIMEOUT_POLL 0 -#define AIO_TIMEOUT_WAIT 1 -#define AIO_TIMEOUT_UNDEF 2 - -/* - * List I/O stuff - */ -static void _lio_list_decr(aio_lio_t *); -static long aio_list_max = 0; - -int -__aio_read(aiocb_t *aiocbp) -{ - if (aiocbp == NULL || aiocbp->aio_reqprio < 0) { - errno = EINVAL; - return (-1); - } - if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { - errno = EBUSY; - return (-1); - } - aiocbp->aio_lio_opcode = LIO_READ; - return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD, - (AIO_KAIO | AIO_NO_DUPS))); -} - -int -__aio_write(aiocb_t *aiocbp) -{ - if (aiocbp == NULL || aiocbp->aio_reqprio < 0) { - errno = EINVAL; - return (-1); - } - if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { - errno = EBUSY; - return (-1); - } - aiocbp->aio_lio_opcode = LIO_WRITE; - return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE, - (AIO_KAIO | AIO_NO_DUPS))); -} - -/* - * __lio_listio() cancellation handler. - */ -/* ARGSUSED */ -static void -_lio_listio_cleanup(aio_lio_t *head) -{ - int freeit = 0; - - ASSERT(MUTEX_HELD(&head->lio_mutex)); - if (head->lio_refcnt == 0) { - ASSERT(head->lio_nent == 0); - freeit = 1; - } - head->lio_waiting = 0; - sig_mutex_unlock(&head->lio_mutex); - if (freeit) - _aio_lio_free(head); -} - -int -__lio_listio(int mode, aiocb_t *const list[], - int nent, struct sigevent *sigev) -{ - int aio_ufs = 0; - int oerrno = 0; - aio_lio_t *head = NULL; - aiocb_t *aiocbp; - int state = 0; - int EIOflg = 0; - int rw; - int do_kaio = 0; - int error; - int i; - - if (!_kaio_ok) - _kaio_init(); - - if (aio_list_max == 0) - aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); - - if (nent <= 0 || nent > aio_list_max) { - errno = EINVAL; - return (-1); - } - - switch (mode) { - case LIO_WAIT: - state = NOCHECK; - break; - case LIO_NOWAIT: - state = CHECK; - break; - default: - errno = EINVAL; - return (-1); - } - - for (i = 0; i < nent; i++) { - if ((aiocbp = list[i]) == NULL) - continue; - if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { - errno = EBUSY; - return (-1); - } - if (aiocbp->aio_lio_opcode == LIO_NOP) - aiocbp->aio_state = NOCHECK; - else { - aiocbp->aio_state = state; - if (KAIO_SUPPORTED(aiocbp->aio_fildes)) - do_kaio++; - else - aiocbp->aio_resultp.aio_errno = ENOTSUP; - } - } - - if (do_kaio) { - error = (int)_kaio(AIOLIO, mode, list, nent, sigev); - if (error == 0) - return (0); - oerrno = errno; - } else { - oerrno = errno = ENOTSUP; - error = -1; - } - - if (error == -1 && errno == ENOTSUP) { - error = errno = 0; - /* - * If LIO_WAIT, or notification required, allocate a list head. - */ - if (mode == LIO_WAIT || - (sigev != NULL && - (sigev->sigev_notify == SIGEV_SIGNAL || - sigev->sigev_notify == SIGEV_THREAD || - sigev->sigev_notify == SIGEV_PORT))) - head = _aio_lio_alloc(); - if (head) { - sig_mutex_lock(&head->lio_mutex); - head->lio_mode = mode; - head->lio_largefile = 0; - if (mode == LIO_NOWAIT && sigev != NULL) { - if (sigev->sigev_notify == SIGEV_THREAD) { - head->lio_port = sigev->sigev_signo; - head->lio_event = AIOLIO; - head->lio_sigevent = sigev; - head->lio_sigval.sival_ptr = - sigev->sigev_value.sival_ptr; - } else if (sigev->sigev_notify == SIGEV_PORT) { - port_notify_t *pn = - sigev->sigev_value.sival_ptr; - head->lio_port = pn->portnfy_port; - head->lio_event = AIOLIO; - head->lio_sigevent = sigev; - head->lio_sigval.sival_ptr = - pn->portnfy_user; - } else { /* SIGEV_SIGNAL */ - head->lio_signo = sigev->sigev_signo; - head->lio_sigval.sival_ptr = - sigev->sigev_value.sival_ptr; - } - } - head->lio_nent = head->lio_refcnt = nent; - sig_mutex_unlock(&head->lio_mutex); - } - /* - * find UFS requests, errno == ENOTSUP/EBADFD, - */ - for (i = 0; i < nent; i++) { - if ((aiocbp = list[i]) == NULL || - aiocbp->aio_lio_opcode == LIO_NOP || - (aiocbp->aio_resultp.aio_errno != ENOTSUP && - aiocbp->aio_resultp.aio_errno != EBADFD)) { - if (head) - _lio_list_decr(head); - continue; - } - if (aiocbp->aio_resultp.aio_errno == EBADFD) - SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); - if (aiocbp->aio_reqprio < 0) { - aiocbp->aio_resultp.aio_errno = EINVAL; - aiocbp->aio_resultp.aio_return = -1; - EIOflg = 1; - if (head) - _lio_list_decr(head); - continue; - } - /* - * submit an AIO request with flags AIO_NO_KAIO - * to avoid the kaio() syscall in _aio_rw() - */ - switch (aiocbp->aio_lio_opcode) { - case LIO_READ: - rw = AIOAREAD; - break; - case LIO_WRITE: - rw = AIOAWRITE; - break; - } - error = _aio_rw(aiocbp, head, &__nextworker_rw, rw, - (AIO_NO_KAIO | AIO_NO_DUPS)); - if (error == 0) - aio_ufs++; - else { - if (head) - _lio_list_decr(head); - aiocbp->aio_resultp.aio_errno = error; - EIOflg = 1; - } - } - } - if (EIOflg) { - errno = EIO; - return (-1); - } - if (mode == LIO_WAIT && oerrno == ENOTSUP) { - /* - * call kaio(AIOLIOWAIT) to get all outstanding - * kernel AIO requests - */ - if ((nent - aio_ufs) > 0) - (void) _kaio(AIOLIOWAIT, mode, list, nent, sigev); - if (head != NULL && head->lio_nent > 0) { - sig_mutex_lock(&head->lio_mutex); - while (head->lio_refcnt > 0) { - int err; - head->lio_waiting = 1; - pthread_cleanup_push(_lio_listio_cleanup, head); - err = sig_cond_wait(&head->lio_cond_cv, - &head->lio_mutex); - pthread_cleanup_pop(0); - head->lio_waiting = 0; - if (err && head->lio_nent > 0) { - sig_mutex_unlock(&head->lio_mutex); - errno = err; - return (-1); - } - } - sig_mutex_unlock(&head->lio_mutex); - ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); - _aio_lio_free(head); - for (i = 0; i < nent; i++) { - if ((aiocbp = list[i]) != NULL && - aiocbp->aio_resultp.aio_errno) { - errno = EIO; - return (-1); - } - } - } - return (0); - } - return (error); -} - -static void -_lio_list_decr(aio_lio_t *head) -{ - sig_mutex_lock(&head->lio_mutex); - head->lio_nent--; - head->lio_refcnt--; - sig_mutex_unlock(&head->lio_mutex); -} - -extern void _cancel_prologue(void); -extern void _cancel_epilogue(void); - -/* - * __aio_suspend() cancellation handler. - */ -/* ARGSUSED */ -static void -_aio_suspend_cleanup(int *counter) -{ - ASSERT(MUTEX_HELD(&__aio_mutex)); - (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */ - sig_mutex_unlock(&__aio_mutex); -} - -int -__aio_suspend(void **list, int nent, const timespec_t *timo, int largefile) -{ - int cv_err; /* error code from cond_xxx() */ - int kerr; /* error code from _kaio(AIOSUSPEND) */ - int i; - timespec_t twait; /* copy of timo for internal calculations */ - timespec_t *wait = NULL; - int timedwait; - int req_outstanding; - aiocb_t **listp; - aiocb_t *aiocbp; -#if !defined(_LP64) - aiocb64_t **listp64; - aiocb64_t *aiocbp64; -#endif - hrtime_t hrtstart; - hrtime_t hrtend; - hrtime_t hrtres; - -#if defined(_LP64) - if (largefile) - _aiopanic("__aio_suspend: largefile set when _LP64 defined"); -#endif - - if (nent <= 0) { - errno = EINVAL; - return (-1); - } - - if (timo) { - if (timo->tv_sec < 0 || timo->tv_nsec < 0 || - timo->tv_nsec >= NANOSEC) { - errno = EINVAL; - return (-1); - } - /* Initialize start time if time monitoring desired */ - if (timo->tv_sec > 0 || timo->tv_nsec > 0) { - timedwait = AIO_TIMEOUT_WAIT; - hrtstart = gethrtime(); - } else { - /* content of timeout = 0 : polling */ - timedwait = AIO_TIMEOUT_POLL; - } - } else { - /* timeout pointer = NULL : wait indefinitely */ - timedwait = AIO_TIMEOUT_INDEF; - } - -#if !defined(_LP64) - if (largefile) { - listp64 = (aiocb64_t **)list; - for (i = 0; i < nent; i++) { - if ((aiocbp64 = listp64[i]) != NULL && - aiocbp64->aio_state == CHECK) - aiocbp64->aio_state = CHECKED; - } - } else -#endif /* !_LP64 */ - { - listp = (aiocb_t **)list; - for (i = 0; i < nent; i++) { - if ((aiocbp = listp[i]) != NULL && - aiocbp->aio_state == CHECK) - aiocbp->aio_state = CHECKED; - } - } - - sig_mutex_lock(&__aio_mutex); - - /* - * The next "if -case" is required to accelerate the - * access to completed RAW-IO requests. - */ - if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { - /* Only kernel requests pending */ - - /* - * _aio_kernel_suspend is used to detect completed non RAW-IO - * requests. - * As long as this thread resides in the kernel (_kaio) further - * asynchronous non RAW-IO requests could be submitted. - */ - _aio_kernel_suspend++; - - /* - * Always do the kaio() call without using the KAIO_SUPPORTED() - * checks because it is not mandatory to have a valid fd - * set in the list entries, only the resultp must be set. - * - * _kaio(AIOSUSPEND ...) return values : - * 0: everythink ok, completed request found - * -1: error - * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,) - * system call using _kaio(AIONOTIFY). It means, that some - * non RAW-IOs completed inbetween. - */ - - pthread_cleanup_push(_aio_suspend_cleanup, - &_aio_kernel_suspend); - pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); - sig_mutex_unlock(&__aio_mutex); - _cancel_prologue(); - kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, - list, nent, timo, -1); - _cancel_epilogue(); - pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */ - pthread_cleanup_pop(0); - - _aio_kernel_suspend--; - - if (!kerr) { - sig_mutex_unlock(&__aio_mutex); - return (0); - } - } else { - kerr = 1; /* simulation: _kaio detected AIONOTIFY */ - } - - /* - * Return kernel error code if no other IOs are outstanding. - */ - req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt; - - sig_mutex_unlock(&__aio_mutex); - - if (req_outstanding == 0) { - /* no IOs outstanding in the thread pool */ - if (kerr == 1) - /* return "no IOs completed" */ - errno = EAGAIN; - return (-1); - } - - /* - * IOs using the thread pool are outstanding. - */ - if (timedwait == AIO_TIMEOUT_WAIT) { - /* time monitoring */ - hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC + - (hrtime_t)timo->tv_nsec; - hrtres = hrtend - gethrtime(); - if (hrtres <= 0) - hrtres = 1; - twait.tv_sec = hrtres / (hrtime_t)NANOSEC; - twait.tv_nsec = hrtres % (hrtime_t)NANOSEC; - wait = &twait; - } else if (timedwait == AIO_TIMEOUT_POLL) { - twait = *timo; /* content of timo = 0 : polling */ - wait = &twait; - } - - for (;;) { - int error; - int inprogress; - - /* first scan file system requests */ - inprogress = 0; - for (i = 0; i < nent; i++) { -#if !defined(_LP64) - if (largefile) { - if ((aiocbp64 = listp64[i]) == NULL) - continue; - error = aiocbp64->aio_resultp.aio_errno; - } else -#endif - { - if ((aiocbp = listp[i]) == NULL) - continue; - error = aiocbp->aio_resultp.aio_errno; - } - if (error == EINPROGRESS) - inprogress = 1; - else if (error != ECANCELED) { - errno = 0; - return (0); - } - } - - sig_mutex_lock(&__aio_mutex); - - /* - * If there aren't outstanding I/Os in the thread pool then - * we have to return here, provided that all kernel RAW-IOs - * also completed. - * If the kernel was notified to return, then we have to check - * possible pending RAW-IOs. - */ - if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) { - sig_mutex_unlock(&__aio_mutex); - errno = EAGAIN; - break; - } - - /* - * There are outstanding IOs in the thread pool or the kernel - * was notified to return. - * Check pending RAW-IOs first. - */ - if (kerr == 1) { - /* - * _aiodone just notified the kernel about - * completed non RAW-IOs (AIONOTIFY was detected). - */ - if (timedwait == AIO_TIMEOUT_WAIT) { - /* Update remaining timeout for the kernel */ - hrtres = hrtend - gethrtime(); - if (hrtres <= 0) { - /* timer expired */ - sig_mutex_unlock(&__aio_mutex); - errno = EAGAIN; - break; - } - wait->tv_sec = hrtres / (hrtime_t)NANOSEC; - wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; - } - _aio_kernel_suspend++; - - pthread_cleanup_push(_aio_suspend_cleanup, - &_aio_kernel_suspend); - pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); - sig_mutex_unlock(&__aio_mutex); - _cancel_prologue(); - kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, - list, nent, wait, -1); - _cancel_epilogue(); - pthread_cleanup_pop(1); - pthread_cleanup_pop(0); - - _aio_kernel_suspend--; - - if (!kerr) { - sig_mutex_unlock(&__aio_mutex); - return (0); - } - } - - if (timedwait == AIO_TIMEOUT_POLL) { - sig_mutex_unlock(&__aio_mutex); - errno = EAGAIN; - break; - } - - if (timedwait == AIO_TIMEOUT_WAIT) { - /* Update remaining timeout */ - hrtres = hrtend - gethrtime(); - if (hrtres <= 0) { - /* timer expired */ - sig_mutex_unlock(&__aio_mutex); - errno = EAGAIN; - break; - } - wait->tv_sec = hrtres / (hrtime_t)NANOSEC; - wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; - } - - if (_aio_outstand_cnt == 0) { - sig_mutex_unlock(&__aio_mutex); - continue; - } - - _aio_suscv_cnt++; /* ID for _aiodone (wake up) */ - - pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt); - if (timedwait == AIO_TIMEOUT_WAIT) { - cv_err = sig_cond_reltimedwait(&_aio_iowait_cv, - &__aio_mutex, wait); - if (cv_err == ETIME) - cv_err = EAGAIN; - } else { - /* wait indefinitely */ - cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex); - } - /* this decrements _aio_suscv_cnt and drops __aio_mutex */ - pthread_cleanup_pop(1); - - if (cv_err) { - errno = cv_err; - break; - } - } - return (-1); -} - -int -__aio_error(aiocb_t *aiocbp) -{ - aio_result_t *resultp = &aiocbp->aio_resultp; - int error; - - if ((error = resultp->aio_errno) == EINPROGRESS) { - if (aiocbp->aio_state == CHECK) { - /* - * Always do the kaio() call without using the - * KAIO_SUPPORTED() checks because it is not - * mandatory to have a valid fd set in the - * aiocb, only the resultp must be set. - */ - if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) { - errno = EINVAL; - return (-1); - } - error = resultp->aio_errno; - } else if (aiocbp->aio_state == CHECKED) { - aiocbp->aio_state = CHECK; - } - } - return (error); -} - -ssize_t -__aio_return(aiocb_t *aiocbp) -{ - aio_result_t *resultp = &aiocbp->aio_resultp; - aio_req_t *reqp; - int error; - ssize_t retval; - - /* - * The _aiodone() function stores resultp->aio_return before - * storing resultp->aio_errno (with an membar_producer() in - * between). We use membar_consumer() below to ensure proper - * memory ordering between _aiodone() and ourself. - */ - error = resultp->aio_errno; - membar_consumer(); - retval = resultp->aio_return; - - /* - * we use this condition to indicate either that - * aio_return() has been called before or should - * not have been called yet. - */ - if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { - errno = error; - return (-1); - } - - /* - * Before we return, mark the result as being returned so that later - * calls to aio_return() will return the fact that the result has - * already been returned. - */ - sig_mutex_lock(&__aio_mutex); - /* retest, in case more than one thread actually got in here */ - if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { - sig_mutex_unlock(&__aio_mutex); - errno = EINVAL; - return (-1); - } - resultp->aio_return = -1; - resultp->aio_errno = EINVAL; - if ((reqp = _aio_hash_del(resultp)) == NULL) - sig_mutex_unlock(&__aio_mutex); - else { - aiocbp->aio_state = NOCHECK; - ASSERT(reqp->req_head == NULL); - (void) _aio_req_remove(reqp); - sig_mutex_unlock(&__aio_mutex); - _aio_req_free(reqp); - } - - if (retval == -1) - errno = error; - return (retval); -} - -void -_lio_remove(aio_req_t *reqp) -{ - aio_lio_t *head; - int refcnt; - - if ((head = reqp->req_head) != NULL) { - sig_mutex_lock(&head->lio_mutex); - ASSERT(head->lio_refcnt == head->lio_nent); - refcnt = --head->lio_nent; - head->lio_refcnt--; - sig_mutex_unlock(&head->lio_mutex); - if (refcnt == 0) - _aio_lio_free(head); - reqp->req_head = NULL; - } -} - -/* - * This function returns the number of asynchronous I/O requests submitted. - */ -static int -__aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, - int workerscnt) -{ - int i; - int error; - aio_worker_t *next = aiowp; - - for (i = 0; i < workerscnt; i++) { - error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); - if (error != 0) { - sig_mutex_lock(&head->lio_mutex); - head->lio_mode = LIO_DESTROY; /* ignore fsync */ - head->lio_nent -= workerscnt - i; - head->lio_refcnt -= workerscnt - i; - sig_mutex_unlock(&head->lio_mutex); - errno = EAGAIN; - return (i); - } - next = next->work_forw; - } - return (i); -} - -/* - * This function is called from aio_fsync(3RT). - */ -int -__aio_fsync(int op, aiocb_t *aiocbp) -{ - aio_lio_t *head; - struct stat statb; - int fret; - - if (aiocbp == NULL) - return (0); - - if (aiocbp->aio_reqprio < 0 || (op != O_DSYNC && op != O_SYNC)) { - errno = EINVAL; - return (-1); - } - if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { - errno = EBUSY; - return (-1); - } - if (fstat(aiocbp->aio_fildes, &statb) < 0) - return (-1); - - /* - * Kernel aio_fsync() is not supported. - * We force user-level aio_fsync() just - * for the notification side-effect. - */ - if (!__uaio_ok && __uaio_init() == -1) - return (-1); - - /* - * The first asynchronous I/O request in the current process will - * create a bunch of workers (via __uaio_init()). If the number - * of workers is zero then the number of pending asynchronous I/O - * requests is zero. In such a case only execute the standard - * fsync(3C) or fdatasync(3RT) as appropriate. - */ - if (__rw_workerscnt == 0) { - if (op == O_DSYNC) - return (__fdsync(aiocbp->aio_fildes, FDSYNC)); - else - return (__fdsync(aiocbp->aio_fildes, FSYNC)); - } - - /* - * re-use aio_offset as the op field. - * O_DSYNC - fdatasync() - * O_SYNC - fsync() - */ - aiocbp->aio_offset = op; - aiocbp->aio_lio_opcode = AIOFSYNC; - - /* - * Create a list of fsync requests. The worker that - * gets the last request will do the fsync request. - */ - head = _aio_lio_alloc(); - if (head == NULL) { - errno = EAGAIN; - return (-1); - } - head->lio_mode = LIO_FSYNC; - head->lio_nent = head->lio_refcnt = __rw_workerscnt; - head->lio_largefile = 0; - - /* - * Insert an fsync request on every worker's queue. - */ - fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt); - if (fret != __rw_workerscnt) { - /* - * Fewer fsync requests than workers means that it was - * not possible to submit fsync requests to all workers. - * Actions: - * a) number of fsync requests submitted is 0: - * => free allocated memory (aio_lio_t). - * b) number of fsync requests submitted is > 0: - * => the last worker executing the fsync request - * will free the aio_lio_t struct. - */ - if (fret == 0) - _aio_lio_free(head); - return (-1); - } - return (0); -} - -int -__aio_cancel(int fd, aiocb_t *aiocbp) -{ - aio_req_t *reqp; - aio_worker_t *aiowp; - int done = 0; - int canceled = 0; - struct stat buf; - - if (fstat(fd, &buf) < 0) - return (-1); - - if (aiocbp != NULL) { - if (fd != aiocbp->aio_fildes) { - errno = EINVAL; - return (-1); - } - if (aiocbp->aio_state == USERAIO) { - sig_mutex_lock(&__aio_mutex); - reqp = _aio_hash_find(&aiocbp->aio_resultp); - if (reqp == NULL) { - sig_mutex_unlock(&__aio_mutex); - return (AIO_ALLDONE); - } - aiowp = reqp->req_worker; - sig_mutex_lock(&aiowp->work_qlock1); - (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); - sig_mutex_unlock(&aiowp->work_qlock1); - sig_mutex_unlock(&__aio_mutex); - if (done) - return (AIO_ALLDONE); - if (canceled) - return (AIO_CANCELED); - return (AIO_NOTCANCELED); - } - if (aiocbp->aio_state == USERAIO_DONE) - return (AIO_ALLDONE); - return ((int)_kaio(AIOCANCEL, fd, aiocbp)); - } - - return (aiocancel_all(fd)); -} - -/* - * __aio_waitn() cancellation handler. - */ -/* ARGSUSED */ -static void -_aio_waitn_cleanup(void *arg) -{ - ASSERT(MUTEX_HELD(&__aio_mutex)); - - /* check for pending aio_waitn() calls */ - _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING); - if (_aio_flags & AIO_LIB_WAITN_PENDING) { - _aio_flags &= ~AIO_LIB_WAITN_PENDING; - (void) cond_signal(&_aio_waitn_cv); - } - - sig_mutex_unlock(&__aio_mutex); -} - -/* - * aio_waitn can be used to reap the results of several I/O operations that - * were submitted asynchronously. The submission of I/Os can be done using - * existing POSIX interfaces: lio_listio, aio_write or aio_read. - * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have - * completed and it returns the descriptors for these I/Os in "list". The - * maximum size of this list is given by "nent" and the actual number of I/Os - * completed is returned in "nwait". Otherwise aio_waitn might also - * return if the timeout expires. Additionally, aio_waitn returns 0 if - * successful or -1 if an error occurred. - */ -int -__aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo) -{ - int error = 0; - uint_t dnwait = 0; /* amount of requests in the waitn-done list */ - uint_t kwaitcnt; /* expected "done" requests from kernel */ - uint_t knentcnt; /* max. expected "done" requests from kernel */ - int uerrno = 0; - int kerrno = 0; /* save errno from _kaio() call */ - int timedwait = AIO_TIMEOUT_UNDEF; - aio_req_t *reqp; - timespec_t end; - timespec_t twait; /* copy of utimo for internal calculations */ - timespec_t *wait = NULL; - - if (nent == 0 || *nwait == 0 || *nwait > nent) { - errno = EINVAL; - return (-1); - } - - /* - * Only one running aio_waitn call per process allowed. - * Further calls will be blocked here until the running - * call finishes. - */ - - sig_mutex_lock(&__aio_mutex); - - while (_aio_flags & AIO_LIB_WAITN) { - if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) { - sig_mutex_unlock(&__aio_mutex); - *nwait = 0; - return (0); - } - _aio_flags |= AIO_LIB_WAITN_PENDING; - pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex); - error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex); - pthread_cleanup_pop(0); - if (error != 0) { - sig_mutex_unlock(&__aio_mutex); - *nwait = 0; - errno = error; - return (-1); - } - } - - pthread_cleanup_push(_aio_waitn_cleanup, NULL); - - _aio_flags |= AIO_LIB_WAITN; - - if (*nwait >= AIO_WAITN_MAXIOCBS) { - if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { - error = -1; - dnwait = 0; - goto out; - } - if (timedwait != AIO_TIMEOUT_INDEF) { - twait = *utimo; - wait = &twait; - } - } - - /* - * If both counters are still set to zero, then only - * kernel requests are currently outstanding (raw-I/Os). - */ - if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { - for (;;) { - kwaitcnt = *nwait - dnwait; - knentcnt = nent - dnwait; - if (knentcnt > AIO_WAITN_MAXIOCBS) - knentcnt = AIO_WAITN_MAXIOCBS; - kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; - - pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); - sig_mutex_unlock(&__aio_mutex); - _cancel_prologue(); - error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, - &kwaitcnt, wait); - _cancel_epilogue(); - pthread_cleanup_pop(1); - - if (error == 0) { - dnwait += kwaitcnt; - if (dnwait >= *nwait || - *nwait < AIO_WAITN_MAXIOCBS) - break; - if (timedwait == AIO_TIMEOUT_WAIT) { - error = _aio_get_timedelta(&end, wait); - if (error == -1) { - /* timer expired */ - errno = ETIME; - break; - } - } - continue; - } - if (errno == EAGAIN) { - if (dnwait > 0) - error = 0; - break; - } - if (errno == ETIME || errno == EINTR) { - dnwait += kwaitcnt; - break; - } - /* fatal error */ - break; - } - - goto out; - } - - /* File system I/Os outstanding ... */ - - if (timedwait == AIO_TIMEOUT_UNDEF) { - if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { - error = -1; - dnwait = 0; - goto out; - } - if (timedwait != AIO_TIMEOUT_INDEF) { - twait = *utimo; - wait = &twait; - } - } - - for (;;) { - uint_t sum_reqs; - - /* - * Calculate sum of active non RAW-IO requests (sum_reqs). - * If the expected amount of completed requests (*nwait) is - * greater than the calculated sum (sum_reqs) then - * use _kaio to check pending RAW-IO requests. - */ - sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt; - kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0; - - if (kwaitcnt != 0) { - /* possibly some kernel I/Os outstanding */ - knentcnt = nent - dnwait; - if (knentcnt > AIO_WAITN_MAXIOCBS) - knentcnt = AIO_WAITN_MAXIOCBS; - kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; - - _aio_flags |= AIO_WAIT_INPROGRESS; - - pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); - sig_mutex_unlock(&__aio_mutex); - _cancel_prologue(); - error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, - &kwaitcnt, wait); - _cancel_epilogue(); - pthread_cleanup_pop(1); - - _aio_flags &= ~AIO_WAIT_INPROGRESS; - - if (error == 0) { - dnwait += kwaitcnt; - } else { - switch (errno) { - case EINVAL: - case EAGAIN: - /* don't wait for kernel I/Os */ - kerrno = 0; /* ignore _kaio() errno */ - *nwait = _aio_doneq_cnt + - _aio_outstand_cnt + dnwait; - error = 0; - break; - case EINTR: - case ETIME: - /* just scan for completed LIB I/Os */ - dnwait += kwaitcnt; - timedwait = AIO_TIMEOUT_POLL; - kerrno = errno; /* save _kaio() errno */ - error = 0; - break; - default: - kerrno = errno; /* save _kaio() errno */ - break; - } - } - if (error) - break; /* fatal kernel error */ - } - - /* check completed FS requests in the "done" queue */ - - while (_aio_doneq_cnt && dnwait < nent) { - /* get done requests */ - if ((reqp = _aio_req_remove(NULL)) != NULL) { - (void) _aio_hash_del(reqp->req_resultp); - list[dnwait++] = reqp->req_aiocbp; - _aio_req_mark_done(reqp); - _lio_remove(reqp); - _aio_req_free(reqp); - } - } - - if (dnwait >= *nwait) { - /* min. requested amount of completed I/Os satisfied */ - break; - } - if (timedwait == AIO_TIMEOUT_WAIT && - (error = _aio_get_timedelta(&end, wait)) == -1) { - /* timer expired */ - uerrno = ETIME; - break; - } - - /* - * If some I/Os are outstanding and we have to wait for them, - * then sleep here. _aiodone() will call _aio_waitn_wakeup() - * to wakeup this thread as soon as the required amount of - * completed I/Os is done. - */ - if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) { - /* - * _aio_waitn_wakeup() will wake up this thread when: - * - _aio_waitncnt requests are completed or - * - _aio_outstand_cnt becomes zero. - * cond_reltimedwait() could also return with - * a timeout error (ETIME). - */ - if (*nwait < _aio_outstand_cnt) - _aio_waitncnt = *nwait; - else - _aio_waitncnt = _aio_outstand_cnt; - - _aio_flags |= AIO_IO_WAITING; - - if (wait) - uerrno = sig_cond_reltimedwait(&_aio_iowait_cv, - &__aio_mutex, wait); - else - uerrno = sig_cond_wait(&_aio_iowait_cv, - &__aio_mutex); - - _aio_flags &= ~AIO_IO_WAITING; - - if (uerrno == ETIME) { - timedwait = AIO_TIMEOUT_POLL; - continue; - } - if (uerrno != 0) - timedwait = AIO_TIMEOUT_POLL; - } - - if (timedwait == AIO_TIMEOUT_POLL) { - /* polling or timer expired */ - break; - } - } - - errno = uerrno == 0 ? kerrno : uerrno; - if (errno) - error = -1; - else - error = 0; - -out: - *nwait = dnwait; - - pthread_cleanup_pop(1); /* drops __aio_mutex */ - - return (error); -} - -void -_aio_waitn_wakeup(void) -{ - /* - * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that - * it is waiting for completed I/Os. The number of required - * completed I/Os is stored into "_aio_waitncnt". - * aio_waitn() is woken up when - * - there are no further outstanding I/Os - * (_aio_outstand_cnt == 0) or - * - the expected number of I/Os has completed. - * Only one __aio_waitn() function waits for completed I/Os at - * a time. - * - * __aio_suspend() increments "_aio_suscv_cnt" to notify - * _aiodone() that at least one __aio_suspend() call is - * waiting for completed I/Os. - * There could be more than one __aio_suspend() function - * waiting for completed I/Os. Because every function should - * be waiting for different I/Os, _aiodone() has to wake up all - * __aio_suspend() functions each time. - * Every __aio_suspend() function will compare the recently - * completed I/O with its own list. - */ - ASSERT(MUTEX_HELD(&__aio_mutex)); - if (_aio_flags & AIO_IO_WAITING) { - if (_aio_waitncnt > 0) - _aio_waitncnt--; - if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 || - _aio_suscv_cnt > 0) - (void) cond_broadcast(&_aio_iowait_cv); - } else { - /* Wake up waiting aio_suspend calls */ - if (_aio_suscv_cnt > 0) - (void) cond_broadcast(&_aio_iowait_cv); - } -} - -/* - * timedwait values : - * AIO_TIMEOUT_POLL : polling - * AIO_TIMEOUT_WAIT : timeout - * AIO_TIMEOUT_INDEF : wait indefinitely - */ -static int -_aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait) -{ - struct timeval curtime; - - if (utimo) { - if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 || - utimo->tv_nsec >= NANOSEC) { - errno = EINVAL; - return (-1); - } - if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) { - (void) gettimeofday(&curtime, NULL); - end->tv_sec = utimo->tv_sec + curtime.tv_sec; - end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec; - if (end->tv_nsec >= NANOSEC) { - end->tv_nsec -= NANOSEC; - end->tv_sec += 1; - } - *timedwait = AIO_TIMEOUT_WAIT; - } else { - /* polling */ - *timedwait = AIO_TIMEOUT_POLL; - } - } else { - *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */ - } - return (0); -} - -#if !defined(_LP64) - -int -__aio_read64(aiocb64_t *aiocbp) -{ - if (aiocbp == NULL || aiocbp->aio_reqprio < 0) { - errno = EINVAL; - return (-1); - } - if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { - errno = EBUSY; - return (-1); - } - aiocbp->aio_lio_opcode = LIO_READ; - return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64, - (AIO_KAIO | AIO_NO_DUPS))); -} - -int -__aio_write64(aiocb64_t *aiocbp) -{ - if (aiocbp == NULL || aiocbp->aio_reqprio < 0) { - errno = EINVAL; - return (-1); - } - if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { - errno = EBUSY; - return (-1); - } - aiocbp->aio_lio_opcode = LIO_WRITE; - return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64, - (AIO_KAIO | AIO_NO_DUPS))); -} - -int -__lio_listio64(int mode, aiocb64_t *const list[], - int nent, struct sigevent *sigev) -{ - int aio_ufs = 0; - int oerrno = 0; - aio_lio_t *head = NULL; - aiocb64_t *aiocbp; - int state = 0; - int EIOflg = 0; - int rw; - int do_kaio = 0; - int error; - int i; - - if (!_kaio_ok) - _kaio_init(); - - if (aio_list_max == 0) - aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); - - if (nent <= 0 || nent > aio_list_max) { - errno = EINVAL; - return (-1); - } - - switch (mode) { - case LIO_WAIT: - state = NOCHECK; - break; - case LIO_NOWAIT: - state = CHECK; - break; - default: - errno = EINVAL; - return (-1); - } - - for (i = 0; i < nent; i++) { - if ((aiocbp = list[i]) == NULL) - continue; - if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { - errno = EBUSY; - return (-1); - } - if (aiocbp->aio_lio_opcode == LIO_NOP) - aiocbp->aio_state = NOCHECK; - else { - aiocbp->aio_state = state; - if (KAIO_SUPPORTED(aiocbp->aio_fildes)) - do_kaio++; - else - aiocbp->aio_resultp.aio_errno = ENOTSUP; - } - } - - if (do_kaio) { - error = (int)_kaio(AIOLIO64, mode, list, nent, sigev); - if (error == 0) - return (0); - oerrno = errno; - } else { - oerrno = errno = ENOTSUP; - error = -1; - } - - if (error == -1 && errno == ENOTSUP) { - error = errno = 0; - /* - * If LIO_WAIT, or notification required, allocate a list head. - */ - if (mode == LIO_WAIT || - (sigev != NULL && - (sigev->sigev_notify == SIGEV_SIGNAL || - sigev->sigev_notify == SIGEV_THREAD || - sigev->sigev_notify == SIGEV_PORT))) - head = _aio_lio_alloc(); - if (head) { - sig_mutex_lock(&head->lio_mutex); - head->lio_mode = mode; - head->lio_largefile = 1; - if (mode == LIO_NOWAIT && sigev != NULL) { - if (sigev->sigev_notify == SIGEV_THREAD) { - head->lio_port = sigev->sigev_signo; - head->lio_event = AIOLIO64; - head->lio_sigevent = sigev; - head->lio_sigval.sival_ptr = - sigev->sigev_value.sival_ptr; - } else if (sigev->sigev_notify == SIGEV_PORT) { - port_notify_t *pn = - sigev->sigev_value.sival_ptr; - head->lio_port = pn->portnfy_port; - head->lio_event = AIOLIO64; - head->lio_sigevent = sigev; - head->lio_sigval.sival_ptr = - pn->portnfy_user; - } else { /* SIGEV_SIGNAL */ - head->lio_signo = sigev->sigev_signo; - head->lio_sigval.sival_ptr = - sigev->sigev_value.sival_ptr; - } - } - head->lio_nent = head->lio_refcnt = nent; - sig_mutex_unlock(&head->lio_mutex); - } - /* - * find UFS requests, errno == ENOTSUP/EBADFD, - */ - for (i = 0; i < nent; i++) { - if ((aiocbp = list[i]) == NULL || - aiocbp->aio_lio_opcode == LIO_NOP || - (aiocbp->aio_resultp.aio_errno != ENOTSUP && - aiocbp->aio_resultp.aio_errno != EBADFD)) { - if (head) - _lio_list_decr(head); - continue; - } - if (aiocbp->aio_resultp.aio_errno == EBADFD) - SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); - if (aiocbp->aio_reqprio < 0) { - aiocbp->aio_resultp.aio_errno = EINVAL; - aiocbp->aio_resultp.aio_return = -1; - EIOflg = 1; - if (head) - _lio_list_decr(head); - continue; - } - /* - * submit an AIO request with flags AIO_NO_KAIO - * to avoid the kaio() syscall in _aio_rw() - */ - switch (aiocbp->aio_lio_opcode) { - case LIO_READ: - rw = AIOAREAD64; - break; - case LIO_WRITE: - rw = AIOAWRITE64; - break; - } - error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw, - (AIO_NO_KAIO | AIO_NO_DUPS)); - if (error == 0) - aio_ufs++; - else { - if (head) - _lio_list_decr(head); - aiocbp->aio_resultp.aio_errno = error; - EIOflg = 1; - } - } - } - if (EIOflg) { - errno = EIO; - return (-1); - } - if (mode == LIO_WAIT && oerrno == ENOTSUP) { - /* - * call kaio(AIOLIOWAIT) to get all outstanding - * kernel AIO requests - */ - if ((nent - aio_ufs) > 0) - (void) _kaio(AIOLIOWAIT, mode, list, nent, sigev); - if (head != NULL && head->lio_nent > 0) { - sig_mutex_lock(&head->lio_mutex); - while (head->lio_refcnt > 0) { - int err; - head->lio_waiting = 1; - pthread_cleanup_push(_lio_listio_cleanup, head); - err = sig_cond_wait(&head->lio_cond_cv, - &head->lio_mutex); - pthread_cleanup_pop(0); - head->lio_waiting = 0; - if (err && head->lio_nent > 0) { - sig_mutex_unlock(&head->lio_mutex); - errno = err; - return (-1); - } - } - sig_mutex_unlock(&head->lio_mutex); - ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); - _aio_lio_free(head); - for (i = 0; i < nent; i++) { - if ((aiocbp = list[i]) != NULL && - aiocbp->aio_resultp.aio_errno) { - errno = EIO; - return (-1); - } - } - } - return (0); - } - return (error); -} - -int -__aio_error64(aiocb64_t *aiocbp) -{ - aio_result_t *resultp = &aiocbp->aio_resultp; - int error; - - if ((error = resultp->aio_errno) == EINPROGRESS) { - if (aiocbp->aio_state == CHECK) { - /* - * Always do the kaio() call without using the - * KAIO_SUPPORTED() checks because it is not - * mandatory to have a valid fd set in the - * aiocb, only the resultp must be set. - */ - if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) { - errno = EINVAL; - return (-1); - } - error = resultp->aio_errno; - } else if (aiocbp->aio_state == CHECKED) { - aiocbp->aio_state = CHECK; - } - } - return (error); -} - -ssize_t -__aio_return64(aiocb64_t *aiocbp) -{ - aio_result_t *resultp = &aiocbp->aio_resultp; - aio_req_t *reqp; - int error; - ssize_t retval; - - /* - * The _aiodone() function stores resultp->aio_return before - * storing resultp->aio_errno (with an membar_producer() in - * between). We use membar_consumer() below to ensure proper - * memory ordering between _aiodone() and ourself. - */ - error = resultp->aio_errno; - membar_consumer(); - retval = resultp->aio_return; - - /* - * we use this condition to indicate either that - * aio_return() has been called before or should - * not have been called yet. - */ - if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { - errno = error; - return (-1); - } - - /* - * Before we return, mark the result as being returned so that later - * calls to aio_return() will return the fact that the result has - * already been returned. - */ - sig_mutex_lock(&__aio_mutex); - /* retest, in case more than one thread actually got in here */ - if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { - sig_mutex_unlock(&__aio_mutex); - errno = EINVAL; - return (-1); - } - resultp->aio_return = -1; - resultp->aio_errno = EINVAL; - if ((reqp = _aio_hash_del(resultp)) == NULL) - sig_mutex_unlock(&__aio_mutex); - else { - aiocbp->aio_state = NOCHECK; - ASSERT(reqp->req_head == NULL); - (void) _aio_req_remove(reqp); - sig_mutex_unlock(&__aio_mutex); - _aio_req_free(reqp); - } - - if (retval == -1) - errno = error; - return (retval); -} - -static int -__aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, - int workerscnt) -{ - int i; - int error; - aio_worker_t *next = aiowp; - - for (i = 0; i < workerscnt; i++) { - error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); - if (error != 0) { - sig_mutex_lock(&head->lio_mutex); - head->lio_mode = LIO_DESTROY; /* ignore fsync */ - head->lio_nent -= workerscnt - i; - head->lio_refcnt -= workerscnt - i; - sig_mutex_unlock(&head->lio_mutex); - errno = EAGAIN; - return (i); - } - next = next->work_forw; - } - return (i); -} - -int -__aio_fsync64(int op, aiocb64_t *aiocbp) -{ - aio_lio_t *head; - struct stat statb; - int fret; - - if (aiocbp == NULL) - return (0); - - if (aiocbp->aio_reqprio < 0 || (op != O_DSYNC && op != O_SYNC)) { - errno = EINVAL; - return (-1); - } - if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { - errno = EBUSY; - return (-1); - } - if (fstat(aiocbp->aio_fildes, &statb) < 0) - return (-1); - - /* - * Kernel aio_fsync() is not supported. - * We force user-level aio_fsync() just - * for the notification side-effect. - */ - if (!__uaio_ok && __uaio_init() == -1) - return (-1); - - /* - * The first asynchronous I/O request in the current process will - * create a bunch of workers (via __uaio_init()). If the number - * of workers is zero then the number of pending asynchronous I/O - * requests is zero. In such a case only execute the standard - * fsync(3C) or fdatasync(3RT) as appropriate. - */ - if (__rw_workerscnt == 0) { - if (op == O_DSYNC) - return (__fdsync(aiocbp->aio_fildes, FDSYNC)); - else - return (__fdsync(aiocbp->aio_fildes, FSYNC)); - } - - /* - * re-use aio_offset as the op field. - * O_DSYNC - fdatasync() - * O_SYNC - fsync() - */ - aiocbp->aio_offset = op; - aiocbp->aio_lio_opcode = AIOFSYNC; - - /* - * Create a list of fsync requests. The worker that - * gets the last request will do the fsync request. - */ - head = _aio_lio_alloc(); - if (head == NULL) { - errno = EAGAIN; - return (-1); - } - head->lio_mode = LIO_FSYNC; - head->lio_nent = head->lio_refcnt = __rw_workerscnt; - head->lio_largefile = 1; - - /* - * Insert an fsync request on every worker's queue. - */ - fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt); - if (fret != __rw_workerscnt) { - /* - * Fewer fsync requests than workers means that it was - * not possible to submit fsync requests to all workers. - * Actions: - * a) number of fsync requests submitted is 0: - * => free allocated memory (aio_lio_t). - * b) number of fsync requests submitted is > 0: - * => the last worker executing the fsync request - * will free the aio_lio_t struct. - */ - if (fret == 0) - _aio_lio_free(head); - return (-1); - } - return (0); -} - -int -__aio_cancel64(int fd, aiocb64_t *aiocbp) -{ - aio_req_t *reqp; - aio_worker_t *aiowp; - int done = 0; - int canceled = 0; - struct stat buf; - - if (fstat(fd, &buf) < 0) - return (-1); - - if (aiocbp != NULL) { - if (fd != aiocbp->aio_fildes) { - errno = EINVAL; - return (-1); - } - if (aiocbp->aio_state == USERAIO) { - sig_mutex_lock(&__aio_mutex); - reqp = _aio_hash_find(&aiocbp->aio_resultp); - if (reqp == NULL) { - sig_mutex_unlock(&__aio_mutex); - return (AIO_ALLDONE); - } - aiowp = reqp->req_worker; - sig_mutex_lock(&aiowp->work_qlock1); - (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); - sig_mutex_unlock(&aiowp->work_qlock1); - sig_mutex_unlock(&__aio_mutex); - if (done) - return (AIO_ALLDONE); - if (canceled) - return (AIO_CANCELED); - return (AIO_NOTCANCELED); - } - if (aiocbp->aio_state == USERAIO_DONE) - return (AIO_ALLDONE); - return ((int)_kaio(AIOCANCEL, fd, aiocbp)); - } - - return (aiocancel_all(fd)); -} - -#endif /* !defined(_LP64) */ diff --git a/usr/src/lib/libaio/common/scalls.c b/usr/src/lib/libaio/common/scalls.c deleted file mode 100644 index adb7cdceb5..0000000000 --- a/usr/src/lib/libaio/common/scalls.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#pragma weak close = _libaio_close - -#include "libaio.h" - -extern void _cancel_prologue(void); -extern void _cancel_epilogue(void); - -int -_libaio_close(int fd) -{ - int rc; - - /* - * Cancel all outstanding aio requests for this file descriptor. - */ - if (fd >= 0 && __uaio_ok) - (void) aiocancel_all(fd); - /* - * If we have allocated the bit array, clear the bit for this file. - * The next open may re-use this file descriptor and the new file - * may have different kaio() behaviour. - */ - if (_kaio_supported != NULL) - CLEAR_KAIO_SUPPORTED(fd); - - _cancel_prologue(); - rc = _close(fd); - _cancel_epilogue(); - - return (rc); -} diff --git a/usr/src/lib/libaio/common/sig.c b/usr/src/lib/libaio/common/sig.c deleted file mode 100644 index ea75bf9513..0000000000 --- a/usr/src/lib/libaio/common/sig.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libaio.h" - -void -sig_mutex_lock(mutex_t *mp) -{ - _sigoff(); - (void) mutex_lock(mp); -} - -void -sig_mutex_unlock(mutex_t *mp) -{ - (void) mutex_unlock(mp); - _sigon(); -} - -int -sig_mutex_trylock(mutex_t *mp) -{ - int error; - - _sigoff(); - if ((error = mutex_trylock(mp)) != 0) - _sigon(); - return (error); -} - -/* - * sig_cond_wait() is a cancellation point. - */ -int -sig_cond_wait(cond_t *cv, mutex_t *mp) -{ - int error; - - pthread_testcancel(); - error = cond_wait(cv, mp); - if (error == EINTR && _sigdeferred() != 0) { - sig_mutex_unlock(mp); - /* take the deferred signal here */ - sig_mutex_lock(mp); - } - pthread_testcancel(); - return (error); -} - -/* - * sig_cond_reltimedwait() is a cancellation point. - */ -int -sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) -{ - int error; - - pthread_testcancel(); - error = cond_reltimedwait(cv, mp, ts); - if (error == EINTR && _sigdeferred() != 0) { - sig_mutex_unlock(mp); - /* take the deferred signal here */ - sig_mutex_lock(mp); - } - pthread_testcancel(); - return (error); -} - -int -_aio_create_worker(aio_req_t *reqp, int mode) -{ - aio_worker_t *aiowp, **workers, **nextworker; - int *aio_workerscnt; - void *(*func)(void *); - sigset_t oset; - int error; - - /* - * Put the new worker thread in the right queue. - */ - switch (mode) { - case AIOREAD: - case AIOWRITE: - case AIOAREAD: - case AIOAWRITE: -#if !defined(_LP64) - case AIOAREAD64: - case AIOAWRITE64: -#endif - workers = &__workers_rw; - nextworker = &__nextworker_rw; - aio_workerscnt = &__rw_workerscnt; - func = _aio_do_request; - break; - case AIONOTIFY: - workers = &__workers_no; - nextworker = &__nextworker_no; - func = _aio_do_notify; - aio_workerscnt = &__no_workerscnt; - break; - default: - _aiopanic("_aio_create_worker: invalid mode"); - break; - } - - if ((aiowp = _aio_worker_alloc()) == NULL) - return (-1); - - if (reqp) { - reqp->req_state = AIO_REQ_QUEUED; - reqp->req_worker = aiowp; - aiowp->work_head1 = reqp; - aiowp->work_tail1 = reqp; - aiowp->work_next1 = reqp; - aiowp->work_count1 = 1; - aiowp->work_minload1 = 1; - } - - (void) pthread_sigmask(SIG_SETMASK, &_full_set, &oset); - error = thr_create(NULL, AIOSTKSIZE, func, aiowp, - THR_DAEMON | THR_SUSPENDED, &aiowp->work_tid); - (void) pthread_sigmask(SIG_SETMASK, &oset, NULL); - if (error) { - if (reqp) { - reqp->req_state = 0; - reqp->req_worker = NULL; - } - _aio_worker_free(aiowp); - return (-1); - } - - sig_mutex_lock(&__aio_mutex); - (*aio_workerscnt)++; - if (*workers == NULL) { - aiowp->work_forw = aiowp; - aiowp->work_backw = aiowp; - *nextworker = aiowp; - *workers = aiowp; - } else { - aiowp->work_backw = (*workers)->work_backw; - aiowp->work_forw = (*workers); - (*workers)->work_backw->work_forw = aiowp; - (*workers)->work_backw = aiowp; - } - _aio_worker_cnt++; - sig_mutex_unlock(&__aio_mutex); - - (void) thr_continue(aiowp->work_tid); - - return (0); -} - -/* - * This is the application's AIOSIGCANCEL sigaction setting. - */ -static struct sigaction sigcanact; - -/* - * This is our AIOSIGCANCEL handler. - * If the signal is not meant for us, call the application's handler. - */ -void -aiosigcancelhndlr(int sig, siginfo_t *sip, void *uap) -{ - aio_worker_t *aiowp; - void (*func)(int, siginfo_t *, void *); - - if (sip != NULL && sip->si_code == SI_LWP && - (aiowp = pthread_getspecific(_aio_key)) != NULL) { - /* - * Only aio worker threads get here (with aiowp != NULL). - */ - siglongjmp(aiowp->work_jmp_buf, 1); - } else if (sigcanact.sa_handler != SIG_IGN && - sigcanact.sa_handler != SIG_DFL) { - /* - * Call the application signal handler. - */ - func = sigcanact.sa_sigaction; - if (sigcanact.sa_flags & SA_RESETHAND) - sigcanact.sa_handler = SIG_DFL; - if (!(sigcanact.sa_flags & SA_SIGINFO)) - sip = NULL; - (void) func(sig, sip, uap); - } - /* - * SIGLWP is ignored by default. - */ -} - -/* consolidation private interface in libc */ -extern int _libc_sigaction(int sig, const struct sigaction *act, - struct sigaction *oact); - -#pragma weak sigaction = _sigaction -int -_sigaction(int sig, const struct sigaction *nact, struct sigaction *oact) -{ - struct sigaction tact; - struct sigaction oldact; - - /* - * We detect SIGIO just to set the _sigio_enabled flag. - */ - if (sig == SIGIO && nact != NULL) - _sigio_enabled = - (nact->sa_handler != SIG_DFL && - nact->sa_handler != SIG_IGN); - - /* - * We interpose on SIGAIOCANCEL (aka SIGLWP). Although SIGLWP - * is a 'reserved' signal that no application should be using, we - * honor the application's handler (see aiosigcancelhndlr(), above). - */ - if (sig == SIGAIOCANCEL) { - oldact = sigcanact; - if (nact != NULL) { - sigcanact = tact = *nact; - if (tact.sa_handler == SIG_DFL || - tact.sa_handler == SIG_IGN) { - tact.sa_flags = SA_SIGINFO; - (void) sigemptyset(&tact.sa_mask); - } else { - tact.sa_flags |= SA_SIGINFO; - tact.sa_flags &= ~(SA_NODEFER | SA_RESETHAND); - } - tact.sa_sigaction = aiosigcancelhndlr; - if (_libc_sigaction(sig, &tact, NULL) == -1) { - sigcanact = oldact; - return (-1); - } - } - if (oact) - *oact = oldact; - return (0); - } - - /* - * Everything else, just call the real sigaction(). - */ - return (_libc_sigaction(sig, nact, oact)); -} - -void -init_signals(void) -{ - struct sigaction act; - - /* - * See if the application has set up a handler for SIGIO. - */ - (void) _libc_sigaction(SIGIO, NULL, &act); - _sigio_enabled = - (act.sa_handler != SIG_DFL && act.sa_handler != SIG_IGN); - - /* - * Arrange to catch SIGAIOCANCEL (SIGLWP). - * If the application has already set up a handler, preserve it. - */ - (void) _libc_sigaction(SIGAIOCANCEL, NULL, &sigcanact); - act = sigcanact; - if (act.sa_handler == SIG_DFL || act.sa_handler == SIG_IGN) { - act.sa_flags = SA_SIGINFO; - (void) sigemptyset(&act.sa_mask); - } else { - act.sa_flags |= SA_SIGINFO; - act.sa_flags &= ~(SA_NODEFER | SA_RESETHAND); - } - act.sa_sigaction = aiosigcancelhndlr; - (void) _libc_sigaction(SIGAIOCANCEL, &act, NULL); -} diff --git a/usr/src/lib/libaio/common/subr.c b/usr/src/lib/libaio/common/subr.c deleted file mode 100644 index add99471d4..0000000000 --- a/usr/src/lib/libaio/common/subr.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "libaio.h" - -void -_aiopanic(char *s) -{ - sigset_t sigmask; - char buf[256]; - - (void) snprintf(buf, sizeof (buf), - "AIO PANIC (thread = %d): %s\n", thr_self(), s); - (void) write(2, buf, strlen(buf)); - (void) sigset(SIGABRT, SIG_DFL); - (void) sigemptyset(&sigmask); - (void) sigaddset(&sigmask, SIGABRT); - (void) sigprocmask(SIG_UNBLOCK, &sigmask, NULL); - (void) thr_kill(thr_self(), SIGABRT); - (void) kill(getpid(), SIGABRT); - _exit(127); -} - -int -assfail(char *a, char *f, int l) -{ - char buf[256]; - - (void) snprintf(buf, sizeof (buf), - "assertion failed: %s, file: %s, line:%d", a, f, l); - _aiopanic(buf); - return (0); -} |