diff options
author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
---|---|---|
committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/lib/libaio | |
download | illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz |
OpenSolaris Launch
Diffstat (limited to 'usr/src/lib/libaio')
24 files changed, 5798 insertions, 0 deletions
diff --git a/usr/src/lib/libaio/Makefile b/usr/src/lib/libaio/Makefile new file mode 100644 index 0000000000..cb0baf37ea --- /dev/null +++ b/usr/src/lib/libaio/Makefile @@ -0,0 +1,79 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 1997-2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all clean clobber install: spec .WAIT $(SUBDIRS) + +lint: $(SUBDIRS) + +LIBRARY= libaio.a +XGETFLAGS= -a +POFILE= $(LIBRARY:.a=.po) +POFILES= generic.po + +# definitions for install_h target +HDRS= asynch.h +HDRDIR= . +ROOTHDRDIR= $(ROOT)/usr/include/sys + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) + +spec $(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +_msg: $(MSGDOMAIN) $(POFILE) + $(RM) $(MSGDOMAIN)/$(POFILE) + $(CP) $(POFILE) $(MSGDOMAIN) + +$(POFILE): $(POFILES) + $(RM) $@ + $(CAT) $(POFILES) > $@ + +$(POFILES): + $(RM) messages.po + $(XGETTEXT) $(XGETFLAGS) *.[ch]* */*.[ch]* + $(SED) -e '/^# msg/d' -e '/^domain/d' messages.po > $@ + $(RM) messages.po + +FRC: + +include ../Makefile.targ diff --git a/usr/src/lib/libaio/Makefile.com b/usr/src/lib/libaio/Makefile.com new file mode 100644 index 0000000000..21f50e208b --- /dev/null +++ b/usr/src/lib/libaio/Makefile.com @@ -0,0 +1,67 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY= libaio.a +VERS= .1 + +COBJS= aio.o \ + posix_aio.o \ + scalls.o \ + sig.o \ + subr.o \ + ma.o + +OBJECTS= $(COBJS) $(MOBJS) + +include ../../Makefile.lib +include ../../Makefile.rootfs + +SRCS= $(COBJS:%.o=../common/%.c) + +LIBS = $(DYNLIB) $(LINTLIB) +LDLIBS += -lc +$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) + +SRCDIR = ../common +MAPDIR = ../spec/$(TRANSMACH) +SPECMAPFILE = $(MAPDIR)/mapfile + +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -I. -Iinc -I.. -I$(SRCDIR) + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../Makefile.targ + +pics/%.o: $(MDIR)/%.s + $(BUILD.s) + $(POST_PROCESS_O) diff --git a/usr/src/lib/libaio/amd64/Makefile b/usr/src/lib/libaio/amd64/Makefile new file mode 100644 index 0000000000..cb39a2beff --- /dev/null +++ b/usr/src/lib/libaio/amd64/Makefile @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/lib/libaio/asynch.h b/usr/src/lib/libaio/asynch.h new file mode 100644 index 0000000000..48dc3dc622 --- /dev/null +++ b/usr/src/lib/libaio/asynch.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1991 by Sun Microsystems, Inc. + */ + +#ifndef _SYS_ASYNCH_H +#define _SYS_ASYNCH_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/feature_tests.h> +#include <sys/types.h> +#include <sys/aio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define AIO_INPROGRESS -2 /* values not set by the system */ + +/* large file compilation environment setup */ +#if !defined(_LP64) && _FILE_OFFSET_BITS == 64 +#ifdef __PRAGMA_REDEFINE_EXTNAME +#pragma redefine_extname aioread aioread64 +#pragma redefine_extname aiowrite aiowrite64 +#else +#define aioread aioread64 +#define aiowrite aiowrite64 +#endif +#endif /* _FILE_OFFSET_BITS */ + +#if defined(_LP64) && defined(_LARGEFILE64_SOURCE) +#ifdef __PRAGMA_REDEFINE_EXTNAME +#pragma redefine_extname aioread64 aioread +#pragma redefine_extname aiowrite64 aiowrite +#else +#define aioread64 aioread +#define aiowrite64 aiowrite +#endif +#endif /* _LP64 && _LARGEFILE64_SOURCE */ +extern int aioread(int, caddr_t, int, off_t, int, aio_result_t *); +extern int aiowrite(int, caddr_t, int, off_t, int, aio_result_t *); +extern int aiocancel(aio_result_t *); +extern aio_result_t *aiowait(struct timeval *); + +/* transitional large file interfaces */ +#if defined(_LARGEFILE64_SOURCE) && !((_FILE_OFFSET_BITS == 64) && \ + !defined(__PRAGMA_REDEFINE_EXTNAME)) +extern int aioread64(int, caddr_t, int, off64_t, int, aio_result_t *); +extern int aiowrite64(int, caddr_t, int, off64_t, int, aio_result_t *); +#endif /* _LARGEFILE64_SOURCE... */ + +#define MAXASYNCHIO 200 /* maxi.number of outstanding i/o's */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ASYNCH_H */ diff --git a/usr/src/lib/libaio/common/Makefile b/usr/src/lib/libaio/common/Makefile new file mode 100644 index 0000000000..5a58f96bc0 --- /dev/null +++ b/usr/src/lib/libaio/common/Makefile @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/libaio/common/Makefile + +LINTSRC32= lintsrc32 +LINTOUT32= lint32.out +LINTLIB32= $(LIBNAME)32 +$(LINTSRC32):= LINTFLAGS += + +LINTSRC64= lintsrc64 +LINTOUT64= lint64.out +LINTLIB64= $(LIBNAME)64 +$(LINTSRC64):= LINTFLAGS64 += -fd -Xtransition=yes + +lints : $(LINTSRC32) $(LINTSRC64) + +$(LINTSRC32): $$(SRCS) + $(LINT.c) -o $(LINTLIB32) $(SRCS) > $(LINTOUT32) 2>&1 + +$(LINTSRC64): $$(SRCS) + $(LINT64.c) -o $(LINTLIB64) $(SRCS) > $(LINTOUT64) 2>&1 + +include ../Makefile.com + diff --git a/usr/src/lib/libaio/common/aio.c b/usr/src/lib/libaio/common/aio.c new file mode 100644 index 0000000000..34f66f8824 --- /dev/null +++ b/usr/src/lib/libaio/common/aio.c @@ -0,0 +1,2252 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libaio.h" +#include <sys/param.h> +#include <sys/file.h> +#include <sys/port.h> + +static int _aio_hash_insert(aio_result_t *, aio_req_t *); +static aio_req_t *_aio_req_alloc(void); +static aio_req_t *_aio_req_get(aio_worker_t *); +static void _aio_req_add(aio_req_t *, aio_worker_t **, int); +static void _aio_req_del(aio_worker_t *, aio_req_t *, int); +static aio_result_t *_aio_req_done(void); +static void _aio_work_done(aio_worker_t *); +aio_req_t *_aio_req_remove(aio_req_t *reqp); +static void _aio_enq_doneq(aio_req_t *reqp); +int _aio_get_timedelta(struct timespec *end, struct timespec *wait); + +aio_req_t *_aio_hash_find(aio_result_t *); +void _aio_req_free(aio_req_t *); +void _aio_lock(void); +void _aio_unlock(void); + +extern int __fdsync(int fd, int mode); +extern int _sigprocmask(int, const sigset_t *, sigset_t *); +extern int _port_dispatch(int, int, int, int, uintptr_t, void *); + +static int _aio_fsync_del(aio_req_t *, aio_lio_t *); +static int _aiodone(aio_req_t *, aio_lio_t *, int, ssize_t, int); +static void _aio_cancel_work(aio_worker_t *, int, int *, int *); + +#ifdef DEBUG +void _aio_stats(void); +#endif + +int _pagesize; + +#define AIOREQSZ (sizeof (struct aio_req)) +#define AIOCLICKS ((_pagesize)/AIOREQSZ) +#define HASHSZ 8192L /* power of 2 */ +#define AIOHASH(resultp) ((((uintptr_t)(resultp) >> 13) ^ \ + ((uintptr_t)(resultp))) & (HASHSZ-1)) +#define POSIX_AIO(x) ((x)->req_type == AIO_POSIX_REQ) + +/* + * switch for kernel async I/O + */ +int _kaio_ok = 0; /* 0 = disabled, 1 = on, -1 = error */ + +/* + * Key for thread-specific data + */ +thread_key_t _aio_key = 0; + +/* + * Array for determining whether or not a file supports kaio + */ +uint32_t _kaio_supported[MAX_KAIO_FDARRAY_SIZE]; + +int _aioreqsize = AIOREQSZ; + +#ifdef DEBUG +int *_donecnt; /* per worker AIO done count */ +int *_idlecnt; /* per worker idle count */ +int *_qfullcnt; /* per worker full q count */ +int *_firstqcnt; /* num times queue one is used */ +int *_newworker; /* num times new worker is created */ +int _clogged = 0; /* num times all queues are locked */ +int _qlocked = 0; /* num times submitter finds q locked */ +int _aio_submitcnt = 0; +int _aio_submitcnt2 = 0; +int _submitcnt = 0; +int _avesubmitcnt = 0; +int _aiowaitcnt = 0; +int _startaiowaitcnt = 1; +int _avedone = 0; +int _new_workers = 0; +#endif + +/* + * workers for read requests. + * (__aio_mutex lock protects circular linked list of workers.) + */ +aio_worker_t *__workers_rd; /* circular list of AIO workers */ +aio_worker_t *__nextworker_rd; /* next worker in list of workers */ +int __rd_workerscnt; /* number of read workers */ + +/* + * workers for write requests. + * (__aio_mutex lock protects circular linked list of workers.) + */ +aio_worker_t *__workers_wr; /* circular list of AIO workers */ +aio_worker_t *__nextworker_wr; /* next worker in list of workers */ +int __wr_workerscnt; /* number of write workers */ + +/* + * worker for sigevent requests. + */ +aio_worker_t *__workers_si; /* circular list of AIO workers */ +aio_worker_t *__nextworker_si; /* next worker in list of workers */ +int __si_workerscnt; /* number of write workers */ + +struct aio_req *_aio_done_tail; /* list of done requests */ +struct aio_req *_aio_done_head; + +mutex_t __aio_initlock = DEFAULTMUTEX; /* makes aio initialization atomic */ +mutex_t __aio_mutex = DEFAULTMUTEX; /* protects counts, and linked lists */ +mutex_t __aio_cachefillock = DEFAULTMUTEX; /* single-thread aio cache filling */ +cond_t _aio_iowait_cv = DEFAULTCV; /* wait for userland I/Os */ +cond_t __aio_cachefillcv = DEFAULTCV; /* sleep cv for cache filling */ + +mutex_t __lio_mutex = DEFAULTMUTEX; /* protects lio lists */ + +int __aiostksz; /* aio worker's stack size */ +int __aio_cachefilling = 0; /* set when aio cache is filling */ +int __sigio_masked = 0; /* bit mask for SIGIO signal */ +int __sigio_maskedcnt = 0; /* mask count for SIGIO signal */ +pid_t __pid = (pid_t)-1; /* initialize as invalid pid */ +static struct aio_req **_aio_hash; +static struct aio_req *_aio_freelist; +static struct aio_req *_aio_doneq; /* double linked done queue list */ +static int _aio_freelist_cnt; + +static struct sigaction act; + +cond_t _aio_done_cv = DEFAULTCV; + +/* + * Input queue of requests which is serviced by the aux. threads. + */ +cond_t _aio_idle_cv = DEFAULTCV; + +int _aio_cnt = 0; +int _aio_donecnt = 0; +int _aio_waitncnt = 0; /* # fs requests for aio_waitn */ +int _aio_doneq_cnt = 0; +int _aio_outstand_cnt = 0; /* number of outstanding requests */ +int _aio_outstand_waitn = 0; /* # of queued requests for aio_waitn */ +int _aio_req_done_cnt = 0; /* req. done but not in "done queue" */ +int _aio_kernel_suspend = 0; /* active kernel kaio calls */ +int _aio_suscv_cnt = 0; /* aio_suspend calls waiting on cv's */ + +int _max_workers = 256; /* max number of workers permitted */ +int _min_workers = 8; /* min number of workers */ +int _maxworkload = 32; /* max length of worker's request q */ +int _minworkload = 2; /* min number of request in q */ +int _aio_worker_cnt = 0; /* number of workers to do requests */ +int _idle_workers = 0; /* number of idle workers */ +int __uaio_ok = 0; /* AIO has been enabled */ +sigset_t _worker_set; /* worker's signal mask */ + +int _aiowait_flag = 0; /* when set, aiowait() is inprogress */ +int _aio_flags = 0; /* see libaio.h defines for */ + +struct aio_worker *_kaiowp; /* points to kaio cleanup thread */ + +/* + * called by the child when the main thread forks. the child is + * cleaned up so that it can use libaio. + */ +void +_aio_forkinit(void) +{ + __uaio_ok = 0; + __workers_rd = NULL; + __nextworker_rd = NULL; + __workers_wr = NULL; + __nextworker_wr = NULL; + _aio_done_tail = NULL; + _aio_done_head = NULL; + _aio_hash = NULL; + _aio_freelist = NULL; + _aio_freelist_cnt = 0; + _aio_doneq = NULL; + _aio_doneq_cnt = 0; + _aio_waitncnt = 0; + _aio_outstand_cnt = 0; + _aio_outstand_waitn = 0; + _aio_req_done_cnt = 0; + _aio_kernel_suspend = 0; + _aio_suscv_cnt = 0; + _aio_flags = 0; + _aio_worker_cnt = 0; + _idle_workers = 0; + _kaio_ok = 0; +#ifdef DEBUG + _clogged = 0; + _qlocked = 0; +#endif +} + +#ifdef DEBUG +/* + * print out a bunch of interesting statistics when the process + * exits. + */ +void +_aio_stats() +{ + int i; + char *fmt; + int cnt; + FILE *fp; + + fp = fopen("/tmp/libaio.log", "w+a"); + if (fp == NULL) + return; + fprintf(fp, "size of AIO request struct = %d bytes\n", _aioreqsize); + fprintf(fp, "number of AIO workers = %d\n", _aio_worker_cnt); + cnt = _aio_worker_cnt + 1; + for (i = 2; i <= cnt; i++) { + fmt = "%d done %d, idle = %d, qfull = %d, newworker = %d\n"; + fprintf(fp, fmt, i, _donecnt[i], _idlecnt[i], _qfullcnt[i], + _newworker[i]); + } + fprintf(fp, "num times submitter found next work queue locked = %d\n", + _qlocked); + fprintf(fp, "num times submitter found all work queues locked = %d\n", + _clogged); + fprintf(fp, "average submit request = %d\n", _avesubmitcnt); + fprintf(fp, "average number of submit requests per new worker = %d\n", + _avedone); +} +#endif + +/* + * libaio is initialized when an AIO request is made. important + * constants are initialized like the max number of workers that + * libaio can create, and the minimum number of workers permitted before + * imposing some restrictions. also, some workers are created. + */ +int +__uaio_init(void) +{ + int i; + size_t size; + extern sigset_t __sigiomask; + struct sigaction oact; + + (void) mutex_lock(&__aio_initlock); + if (_aio_key == 0 && + thr_keycreate(&_aio_key, _aio_free_worker) != 0) + _aiopanic("__uaio_init, thr_keycreate()\n"); + if (!__uaio_ok) { + __pid = getpid(); + + if (_sigaction(SIGAIOCANCEL, NULL, &oact) == -1) { + (void) mutex_unlock(&__aio_initlock); + return (-1); + } + + if (oact.sa_handler != aiosigcancelhndlr) { + act.sa_handler = aiosigcancelhndlr; + act.sa_flags = SA_SIGINFO; + if (_sigaction(SIGAIOCANCEL, &act, &sigcanact) == -1) { + (void) mutex_unlock(&__aio_initlock); + return (-1); + } + } + + /* + * Constant sigiomask, used by _aiosendsig() + */ + (void) sigaddset(&__sigiomask, SIGIO); +#ifdef DEBUG + size = _max_workers * (sizeof (int) * 5 + + sizeof (int)); + _donecnt = malloc(size); + (void) memset((caddr_t)_donecnt, 0, size); + _idlecnt = _donecnt + _max_workers; + _qfullcnt = _idlecnt + _max_workers; + _firstqcnt = _qfullcnt + _max_workers; + _newworker = _firstqcnt + _max_workers; + atexit(_aio_stats); +#endif + size = HASHSZ * sizeof (struct aio_req *); + _aio_hash = malloc(size); + if (_aio_hash == NULL) { + (void) mutex_unlock(&__aio_initlock); + return (-1); + } + (void) memset((caddr_t)_aio_hash, 0, size); + + /* initialize worker's signal mask to only catch SIGAIOCANCEL */ + (void) sigfillset(&_worker_set); + (void) sigdelset(&_worker_set, SIGAIOCANCEL); + + /* + * Create equal number of READ and WRITE workers. + */ + i = 0; + while (i++ < (_min_workers/2)) + (void) _aio_create_worker(NULL, AIOREAD); + i = 0; + while (i++ < (_min_workers/2)) + (void) _aio_create_worker(NULL, AIOWRITE); + + /* create one worker to send completion signals. */ + (void) _aio_create_worker(NULL, AIOSIGEV); + (void) mutex_unlock(&__aio_initlock); + __uaio_ok = 1; + return (0); + } + + (void) mutex_unlock(&__aio_initlock); + return (0); +} + +/* + * special kaio cleanup thread sits in a loop in the + * kernel waiting for pending kaio requests to complete. + */ +void * +_kaio_cleanup_thread(void *arg) +{ + if (thr_setspecific(_aio_key, arg) != 0) + _aiopanic("_kaio_cleanup_thread, thr_setspecific()\n"); + (void) _kaio(AIOSTART); + return (arg); +} + +/* + * initialize kaio. + */ +void +_kaio_init() +{ + int error; + sigset_t set, oset; + + (void) mutex_lock(&__aio_initlock); + if (_aio_key == 0 && + thr_keycreate(&_aio_key, _aio_free_worker) != 0) + _aiopanic("_kaio_init, thr_keycreate()\n"); + if (!_kaio_ok) { + _pagesize = (int)PAGESIZE; + __aiostksz = 8 * _pagesize; + if ((_kaiowp = _aio_alloc_worker()) == NULL) { + error = ENOMEM; + } else { + if ((error = (int)_kaio(AIOINIT)) == 0) { + (void) sigfillset(&set); + (void) _sigprocmask(SIG_SETMASK, &set, &oset); + error = thr_create(NULL, __aiostksz, + _kaio_cleanup_thread, _kaiowp, + THR_BOUND | THR_DAEMON, &_kaiowp->work_tid); + (void) _sigprocmask(SIG_SETMASK, &oset, NULL); + } + if (error) { + _aio_free_worker(_kaiowp); + _kaiowp = NULL; + } + } + if (error) + _kaio_ok = -1; + else + _kaio_ok = 1; + } + (void) mutex_unlock(&__aio_initlock); +} + +int +aioread(int fd, caddr_t buf, int bufsz, off_t offset, int whence, + aio_result_t *resultp) +{ + return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOREAD)); +} + +int +aiowrite(int fd, caddr_t buf, int bufsz, off_t offset, int whence, + aio_result_t *resultp) +{ + return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOWRITE)); +} + +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) +int +aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, + aio_result_t *resultp) +{ + return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAREAD64)); +} + +int +aiowrite64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, + aio_result_t *resultp) +{ + return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAWRITE64)); +} +#endif /* (_LARGEFILE64_SOURCE) && !defined(_LP64) */ + +int +_aiorw(int fd, caddr_t buf, int bufsz, offset_t offset, int whence, + aio_result_t *resultp, int mode) +{ + aio_worker_t **nextworker; + aio_req_t *aiorp = NULL; + aio_args_t *ap = NULL; + offset_t loffset = 0; + struct stat stat; + int err = 0; + int kerr; + int umode; + + switch (whence) { + + case SEEK_SET: + loffset = offset; + break; + case SEEK_CUR: + if ((loffset = llseek(fd, 0, SEEK_CUR)) == -1) + err = -1; + else + loffset += offset; + break; + case SEEK_END: + if (fstat(fd, &stat) == -1) + err = -1; + else + loffset = offset + stat.st_size; + break; + default: + errno = EINVAL; + err = -1; + } + + if (err) + return (err); + + /* initialize kaio */ + if (!_kaio_ok) + _kaio_init(); + + /* + * _aio_do_request() needs the original request code (mode) to be able + * to choose the appropiate 32/64 bit function. All other functions + * only require the difference between READ and WRITE (umode). + */ + if (mode == AIOAREAD64 || mode == AIOAWRITE64) + umode = mode - AIOAREAD64; + else + umode = mode; + + /* + * Try kernel aio first. + * If errno is ENOTSUP/EBADFD, fall back to the thread implementation. + */ + if ((_kaio_ok > 0) && (KAIO_SUPPORTED(fd))) { + resultp->aio_errno = 0; + kerr = (int)_kaio(((resultp->aio_return == AIO_INPROGRESS) ? + (umode | AIO_POLL_BIT) : umode), + fd, buf, bufsz, loffset, resultp); + if (kerr == 0) + return (0); + else if ((errno != ENOTSUP) && (errno != EBADFD)) + return (-1); + if (errno == EBADFD) + SET_KAIO_NOT_SUPPORTED(fd); + } + if (!__uaio_ok) { + if (__uaio_init() == -1) + return (-1); + } + + aiorp = _aio_req_alloc(); + if (aiorp == (aio_req_t *)-1) { + errno = EAGAIN; + return (-1); + } + + /* + * _aio_do_request() checks aiorp->req_op to differentiate + * between 32 and 64 bit access. + */ + aiorp->req_op = mode; + aiorp->req_resultp = resultp; + ap = &(aiorp->req_args); + ap->fd = fd; + ap->buf = buf; + ap->bufsz = bufsz; + ap->offset = loffset; + + nextworker = ((umode == AIOWRITE) ? &__nextworker_wr : + &__nextworker_rd); + _aio_lock(); + if (_aio_hash_insert(resultp, aiorp)) { + _aio_req_free(aiorp); + _aio_unlock(); + errno = EINVAL; + return (-1); + } else { + _aio_unlock(); + + /* + * _aio_req_add() only needs the difference between READ and + * WRITE to choose the right worker queue. + */ + _aio_req_add(aiorp, nextworker, umode); + return (0); + } +} + +int +aiocancel(aio_result_t *resultp) +{ + aio_req_t *aiorp; + struct aio_worker *aiowp; + int done = 0, canceled = 0; + + if (!__uaio_ok) { + errno = EINVAL; + return (-1); + } + + _aio_lock(); + aiorp = _aio_hash_find(resultp); + if (aiorp == NULL) { + if (_aio_outstand_cnt == _aio_req_done_cnt) + errno = EINVAL; + else + errno = EACCES; + + _aio_unlock(); + return (-1); + } else { + aiowp = aiorp->req_worker; + (void) mutex_lock(&aiowp->work_qlock1); + (void) _aio_cancel_req(aiowp, aiorp, &canceled, &done); + (void) mutex_unlock(&aiowp->work_qlock1); + + if (canceled) { + _aio_unlock(); + return (0); + } + + if (_aio_outstand_cnt == 0) { + _aio_unlock(); + errno = EINVAL; + return (-1); + } + + if (_aio_outstand_cnt == _aio_req_done_cnt) { + errno = EINVAL; + } else { + errno = EACCES; + } + + _aio_unlock(); + return (-1); + + } +} + +/* + * This must be asynch safe + */ +aio_result_t * +aiowait(struct timeval *uwait) +{ + aio_result_t *uresultp, *kresultp, *resultp; + int dontblock; + int timedwait = 0; + int kaio_errno = 0; + struct timeval twait, *wait = NULL; + hrtime_t hrtend; + hrtime_t hres; + + if (uwait) { + /* + * Check for valid specified wait time. If they are invalid + * fail the call right away. + */ + if (uwait->tv_sec < 0 || uwait->tv_usec < 0 || + uwait->tv_usec >= MICROSEC) { + errno = EINVAL; + return ((aio_result_t *)-1); + } + + if ((uwait->tv_sec > 0) || (uwait->tv_usec > 0)) { + hrtend = gethrtime() + + (hrtime_t)uwait->tv_sec * NANOSEC + + (hrtime_t)uwait->tv_usec * (NANOSEC / MICROSEC); + twait = *uwait; + wait = &twait; + timedwait++; + } else { + /* polling */ + kresultp = (aio_result_t *)_kaio(AIOWAIT, + (struct timeval *)-1, 1); + if (kresultp != (aio_result_t *)-1 && + kresultp != NULL && kresultp != (aio_result_t *)1) + return (kresultp); + _aio_lock(); + uresultp = _aio_req_done(); + if (uresultp != NULL && uresultp != + (aio_result_t *)-1) { + _aio_unlock(); + return (uresultp); + } + _aio_unlock(); + if (uresultp == (aio_result_t *)-1 && + kresultp == (aio_result_t *)-1) { + errno = EINVAL; + return ((aio_result_t *)-1); + } else + return (NULL); + } + } + + for (;;) { + _aio_lock(); + uresultp = _aio_req_done(); + if (uresultp != NULL && uresultp != (aio_result_t *)-1) { + _aio_unlock(); + resultp = uresultp; + break; + } + _aiowait_flag++; + _aio_unlock(); + dontblock = (uresultp == (aio_result_t *)-1); + kresultp = (aio_result_t *)_kaio(AIOWAIT, wait, dontblock); + kaio_errno = errno; + _aio_lock(); + _aiowait_flag--; + _aio_unlock(); + if (kresultp == (aio_result_t *)1) { + /* aiowait() awakened by an aionotify() */ + continue; + } else if (kresultp != NULL && kresultp != (aio_result_t *)-1) { + resultp = kresultp; + break; + } else if (kresultp == (aio_result_t *)-1 && kaio_errno == + EINVAL && uresultp == (aio_result_t *)-1) { + errno = kaio_errno; + resultp = (aio_result_t *)-1; + break; + } else if (kresultp == (aio_result_t *)-1 && + kaio_errno == EINTR) { + errno = kaio_errno; + resultp = (aio_result_t *)-1; + break; + } else if (timedwait) { + hres = hrtend - gethrtime(); + if (hres <= 0) { + /* time is up. Return */ + resultp = NULL; + break; + } else { + /* + * some time left. Round up the remaining time + * in nanoseconds to microsec. Retry the call. + */ + hres += (NANOSEC / MICROSEC)-1; + wait->tv_sec = hres / NANOSEC; + wait->tv_usec = + (hres % NANOSEC) / (NANOSEC / MICROSEC); + } + } else { + ASSERT((kresultp == NULL && uresultp == NULL)); + resultp = NULL; + continue; + } + } + return (resultp); +} + +/* + * _aio_get_timedelta calculates the remaining time and stores the result + * into struct timespec *wait. + */ + +int +_aio_get_timedelta(struct timespec *end, struct timespec *wait) +{ + + int ret = 0; + struct timeval cur; + struct timespec curtime; + + (void) gettimeofday(&cur, NULL); + curtime.tv_sec = cur.tv_sec; + curtime.tv_nsec = cur.tv_usec * 1000; /* convert us to ns */ + + if (end->tv_sec >= curtime.tv_sec) { + wait->tv_sec = end->tv_sec - curtime.tv_sec; + if (end->tv_nsec >= curtime.tv_nsec) { + wait->tv_nsec = end->tv_nsec - curtime.tv_nsec; + if (wait->tv_sec == 0 && wait->tv_nsec == 0) + ret = -1; /* timer expired */ + } else { + if (end->tv_sec > curtime.tv_sec) { + wait->tv_sec -= 1; + wait->tv_nsec = NANOSEC - + (curtime.tv_nsec - end->tv_nsec); + } else { + ret = -1; /* timer expired */ + } + } + } else { + ret = -1; + } + return (ret); +} + +/* + * If closing by file descriptor: we will simply cancel all the outstanding + * aio`s and return. Those aio's in question will have either noticed the + * cancellation notice before, during, or after initiating io. + */ +int +aiocancel_all(int fd) +{ + aio_req_t *aiorp; + aio_req_t **aiorpp; + struct aio_worker *first, *next; + int canceled = 0; + int done = 0; + int cancelall = 0; + + if (_aio_outstand_cnt == 0) + return (AIO_ALLDONE); + + _aio_lock(); + /* + * cancel read requests from the read worker's queue. + */ + first = __nextworker_rd; + next = first; + do { + _aio_cancel_work(next, fd, &canceled, &done); + } while ((next = next->work_forw) != first); + + /* + * cancel write requests from the write workers queue. + */ + + first = __nextworker_wr; + next = first; + do { + _aio_cancel_work(next, fd, &canceled, &done); + } while ((next = next->work_forw) != first); + + /* + * finally, check if there are requests on the done queue that + * should be canceled. + */ + if (fd < 0) + cancelall = 1; + aiorpp = &_aio_done_tail; + while ((aiorp = *aiorpp) != NULL) { + if (cancelall || aiorp->req_args.fd == fd) { + *aiorpp = aiorp->req_next; + _aio_donecnt--; + (void) _aio_hash_del(aiorp->req_resultp); + _aio_req_free(aiorp); + } else + aiorpp = &aiorp->req_next; + } + if (cancelall) { + ASSERT(_aio_donecnt == 0); + _aio_done_head = NULL; + } + _aio_unlock(); + + if (canceled && done == 0) + return (AIO_CANCELED); + else if (done && canceled == 0) + return (AIO_ALLDONE); + else if ((canceled + done == 0) && KAIO_SUPPORTED(fd)) + return ((int)_kaio(AIOCANCEL, fd, NULL)); + return (AIO_NOTCANCELED); +} + +/* + * cancel requests from a given work queue. if the file descriptor + * parameter, fd, is non NULL, then only cancel those requests in + * this queue that are to this file descriptor. if the "fd" + * parameter is -1, then cancel all requests. + */ +static void +_aio_cancel_work(aio_worker_t *aiowp, int fd, int *canceled, int *done) +{ + aio_req_t *aiorp; + + (void) mutex_lock(&aiowp->work_qlock1); + /* + * cancel queued requests first. + */ + aiorp = aiowp->work_tail1; + while (aiorp != NULL) { + if (fd < 0 || aiorp->req_args.fd == fd) { + if (_aio_cancel_req(aiowp, aiorp, canceled, done)) { + /* + * callers locks were dropped. aiorp is + * invalid, start traversing the list from + * the beginning. + */ + aiorp = aiowp->work_tail1; + continue; + } + } + aiorp = aiorp->req_next; + } + /* + * since the queued requests have been canceled, there can + * only be one inprogress request that shoule be canceled. + */ + if ((aiorp = aiowp->work_req) != NULL) { + if (fd < 0 || aiorp->req_args.fd == fd) { + (void) _aio_cancel_req(aiowp, aiorp, canceled, done); + aiowp->work_req = NULL; + } + } + (void) mutex_unlock(&aiowp->work_qlock1); +} + +/* + * cancel a request. return 1 if the callers locks were temporarily + * dropped, otherwise return 0. + */ +int +_aio_cancel_req(aio_worker_t *aiowp, aio_req_t *aiorp, int *canceled, int *done) +{ + int ostate; + int rwflg = 1; + int siqueued; + int canned; + + ASSERT(MUTEX_HELD(&__aio_mutex)); + ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); + ostate = aiorp->req_state; + if (ostate == AIO_REQ_CANCELED) { + return (0); + } + if (ostate == AIO_REQ_DONE || ostate == AIO_REQ_DONEQ) { + (*done)++; + return (0); + } + if (ostate == AIO_REQ_FREE) + return (0); + if (aiorp->req_op == AIOFSYNC) { + canned = aiorp->lio_head->lio_canned; + aiorp->lio_head->lio_canned = 1; + rwflg = 0; + if (canned) + return (0); + } + aiorp->req_state = AIO_REQ_CANCELED; + _aio_req_del(aiowp, aiorp, ostate); + if (ostate == AIO_REQ_INPROGRESS) + (void) thr_kill(aiowp->work_tid, SIGAIOCANCEL); + (void) mutex_unlock(&aiowp->work_qlock1); + (void) _aio_hash_del(aiorp->req_resultp); + (void) mutex_unlock(&__aio_mutex); + siqueued = _aiodone(aiorp, aiorp->lio_head, rwflg, -1, ECANCELED); + (void) mutex_lock(&__aio_mutex); + (void) mutex_lock(&aiowp->work_qlock1); + _lio_remove(aiorp->lio_head); + if (!siqueued) + _aio_req_free(aiorp); + (*canceled)++; + return (1); +} + +/* + * This is the worker's main routine. + * The task of this function is to execute all queued requests; + * once the last pending request is executed this function will block + * in _aio_idle(). A new incoming request must wakeup this thread to + * restart the work. + * Every worker has an own work queue. The queue lock is required + * to synchronize the addition of new requests for this worker or + * cancellation of pending/running requests. + * + * Cancellation scenarios: + * The cancellation of a request is being done asynchronously using + * _aio_cancel_req() from another thread context. + * A queued request can be cancelled in different manners : + * a) request is queued but not "in progress" or "done" (AIO_REQ_QUEUED): + * - lock the queue -> remove the request -> unlock the queue + * - this function/thread does not detect this cancellation process + * b) request is in progress (AIO_REQ_INPROGRESS) : + * - this function first allow the cancellation of the running + * request with the flag "work_cancel_flg=1" + * see _aio_req_get() -> _aio_cancel_on() + * During this phase, it is allowed to interrupt the worker + * thread running the request (this thread) using the SIGAIOCANCEL + * signal. + * Once this thread returns from the kernel (because the request + * is just done), then it must disable a possible cancellation + * and proceed to finish the request. To disable the cancellation + * this thread must use _aio_cancel_off() to set "work_cancel_flg=0". + * c) request is already done (AIO_REQ_DONE || AIO_REQ_DONEQ): + * same procedure as in a) + * + * To b) + * This thread uses sigsetjmp() to define the position in the code, where + * it wish to continue working in the case that a SIGAIOCANCEL signal + * is detected. + * Normally this thread should get the cancellation signal during the + * kernel phase (reading or writing). In that case the signal handler + * aiosigcancelhndlr() is activated using the worker thread context, + * which again will use the siglongjmp() function to break the standard + * code flow and jump to the "sigsetjmp" position, provided that + * "work_cancel_flg" is set to "1". + * Because the "work_cancel_flg" is only manipulated by this worker + * thread and it can only run on one CPU at a given time, it is not + * necessary to protect that flag with the queue lock. + * Returning from the kernel (read or write system call) we must + * first disable the use of the SIGAIOCANCEL signal and accordingly + * the use of the siglongjmp() function to prevent a possible deadlock: + * - It can happens that this worker thread returns from the kernel and + * blocks in "work_qlock1", + * - then a second thread cancels the apparently "in progress" request + * and sends the SIGAIOCANCEL signal to the worker thread, + * - the worker thread gets assigned the "work_qlock1" and will returns + * from the kernel, + * - the kernel detects the pending signal and activates the signal + * handler instead, + * - if the "work_cancel_flg" is still set then the signal handler + * should use siglongjmp() to cancel the "in progress" request and + * it would try to acquire the same work_qlock1 in _aio_req_get() + * for a second time => deadlock. + * To avoid that situation we disable the cancellation of the request + * in progress BEFORE we try to acquire the work_qlock1. + * In that case the signal handler will not call siglongjmp() and the + * worker thread will continue running the standard code flow. + * Then this thread must check the AIO_REQ_CANCELED flag to emulate + * an eventually required siglongjmp() freeing the work_qlock1 and + * avoiding a deadlock. + */ +void * +_aio_do_request(void *arglist) +{ + aio_worker_t *aiowp = (aio_worker_t *)arglist; + struct aio_args *arg; + aio_req_t *aiorp; /* current AIO request */ + int ostate; + ssize_t retval; + int rwflg; + + aiowp->work_tid = thr_self(); + if (thr_setspecific(_aio_key, aiowp) != 0) + _aiopanic("_aio_do_request, thr_setspecific()\n"); + +cancelit: + if (sigsetjmp(aiowp->work_jmp_buf, 0)) { + _sigprocmask(SIG_SETMASK, &_worker_set, NULL); + goto cancelit; + } + + for (;;) { + int err = 0; + + /* + * Put completed requests on aio_done_list. This has + * to be done as part of the main loop to ensure that + * we don't artificially starve any aiowait'ers. + */ + if (aiowp->work_done1) + _aio_work_done(aiowp); + + while ((aiorp = _aio_req_get(aiowp)) == NULL) { + _aio_idle(aiowp); + } +#ifdef DEBUG + _donecnt[aiowp->work_tid]++; +#endif + arg = &aiorp->req_args; + + err = 0; + rwflg = 1; + switch (aiorp->req_op) { + case AIOREAD: + retval = pread(arg->fd, arg->buf, + arg->bufsz, arg->offset); + if (retval == -1) { + if (errno == ESPIPE) { + retval = read(arg->fd, + arg->buf, arg->bufsz); + if (retval == -1) + err = errno; + } else { + err = errno; + } + } + break; + case AIOWRITE: + retval = pwrite(arg->fd, arg->buf, + arg->bufsz, arg->offset); + if (retval == -1) { + if (errno == ESPIPE) { + retval = write(arg->fd, + arg->buf, arg->bufsz); + if (retval == -1) + err = errno; + } else { + err = errno; + } + } + break; +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) + case AIOAREAD64: + retval = pread64(arg->fd, arg->buf, + arg->bufsz, arg->offset); + if (retval == -1) { + if (errno == ESPIPE) { + retval = read(arg->fd, + arg->buf, arg->bufsz); + if (retval == -1) + err = errno; + } else { + err = errno; + } + } + break; + case AIOAWRITE64: + retval = pwrite64(arg->fd, arg->buf, + arg->bufsz, arg->offset); + if (retval == -1) { + if (errno == ESPIPE) { + retval = write(arg->fd, + arg->buf, arg->bufsz); + if (retval == -1) + err = errno; + } else { + err = errno; + } + } + break; +#endif /* (_LARGEFILE64_SOURCE) && !defined(_LP64) */ + case AIOFSYNC: + if (_aio_fsync_del(aiorp, aiorp->lio_head)) + continue; + (void) mutex_lock(&aiowp->work_qlock1); + ostate = aiorp->req_state; + (void) mutex_unlock(&aiowp->work_qlock1); + if (ostate == AIO_REQ_CANCELED) { + (void) mutex_lock(&aiorp->req_lock); + aiorp->req_canned = 1; + (void) cond_broadcast( + &aiorp->req_cancv); + (void) mutex_unlock(&aiorp->req_lock); + continue; + } + rwflg = 0; + /* + * all writes for this fsync request are + * now acknowledged. now, make these writes + * visible. + */ + if (arg->offset == O_SYNC) + retval = __fdsync(arg->fd, FSYNC); + else + retval = __fdsync(arg->fd, FDSYNC); + if (retval == -1) + err = errno; + break; + default: + rwflg = 0; + _aiopanic("_aio_do_request, bad op\n"); + } + + /* + * Disable the cancellation of the "in progress" + * request before trying to acquire the lock of the queue. + * + * It is not necessary to protect "work_cancel_flg" with + * work_qlock1, because this thread can only run on one + * CPU at a time. + */ + + _aio_cancel_off(aiowp); + (void) mutex_lock(&aiowp->work_qlock1); + + /* + * if we return here either + * - we got the lock and can close the transaction + * as usual or + * - the current transaction was cancelled, but siglongjmp + * was not executed + */ + + if (aiorp->req_state == AIO_REQ_CANCELED) { + (void) mutex_unlock(&aiowp->work_qlock1); + continue; + } + + aiorp->req_state = AIO_REQ_DONE; + _aio_req_done_cnt++; + (void) mutex_unlock(&aiowp->work_qlock1); + (void) _aiodone(aiorp, aiorp->lio_head, rwflg, retval, err); + } + /* NOTREACHED */ + return (NULL); +} + +/* + * posix supports signal notification for completed aio requests. + * when aio_do_requests() notices that an aio requests should send + * a signal, the aio request is moved to the signal notification + * queue. this routine drains this queue, and guarentees that the + * signal notification is sent. + */ +void * +_aio_send_sigev(void *arg) +{ + aio_req_t *rp; + aio_worker_t *aiowp = (aio_worker_t *)arg; + + aiowp->work_tid = thr_self(); + if (thr_setspecific(_aio_key, aiowp) != 0) + _aiopanic("_aio_send_sigev, thr_setspecific()\n"); + + for (;;) { + while ((rp = _aio_req_get(aiowp)) == NULL) { + _aio_idle(aiowp); + } + if (rp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { + while (__sigqueue(__pid, rp->aio_sigevent.sigev_signo, + rp->aio_sigevent.sigev_value.sival_ptr, + SI_ASYNCIO) == -1) + thr_yield(); + } + if (rp->lio_signo) { + while (__sigqueue(__pid, rp->lio_signo, + rp->lio_sigval.sival_ptr, SI_ASYNCIO) == -1) + thr_yield(); + } + _aio_lock(); + _lio_remove(rp->lio_head); + _aio_req_free(rp); + _aio_unlock(); + } + /* NOTREACHED */ + return (NULL); +} + +/* + * do the completion semantic for a request that was either canceled + * by _aio_cancel_req(), or was completed by _aio_do_request(). return + * the value 1 when a sigevent was queued, otherwise return 0. + */ + +static int +_aiodone(aio_req_t *rp, aio_lio_t *head, int rwflg, ssize_t retval, int err) +{ + volatile aio_result_t *resultp; +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) + aiocb64_t *aiop64; +#endif + int sigev; + + _aio_lock(); + + if (POSIX_AIO(rp)) { + void *user; + int port; + int error; + + if (rp->aio_sigevent.sigev_notify == SIGEV_PORT) { + resultp = rp->req_resultp; + resultp->aio_return = retval; + resultp->aio_errno = err; + + if (err == ECANCELED || rwflg) + _aio_outstand_cnt--; + +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) + if (rp->req_op == AIOAREAD64 || + rp->req_op == AIOAWRITE64) { + aiop64 = (void *)rp->req_iocb; + aiop64->aio_state = USERAIO_DONE; + } else +#endif + rp->req_iocb->aio_state = USERAIO_DONE; + + port = rp->aio_sigevent.sigev_signo; + user = rp->aio_sigevent.sigev_value.sival_ptr; + error = _port_dispatch(port, 0, PORT_SOURCE_AIO, 0, + (uintptr_t)rp->req_iocb, user); + if (error == 0) { + (void) _aio_hash_del(rp->req_resultp); + _aio_req_free(rp); + _aio_unlock(); + return (1); + } + /* + * Can not submit the I/O completion to the port, + * set status of transaction to NONE + */ + rp->aio_sigevent.sigev_notify = SIGEV_NONE; + if (err == ECANCELED || rwflg) + _aio_outstand_cnt++; + } + + sigev = (rp->aio_sigevent.sigev_notify == SIGEV_SIGNAL || + (head && head->lio_signo)); + if (sigev) + (void) _aio_hash_del(rp->req_resultp); + + resultp = rp->req_resultp; + /* + * resultp is declared "volatile" (above) to avoid + * optimization by compiler ie. switching order which could + * lead aio_return getting checked by aio_error() following + * a particular aio_errno value (aio_return would not have been + * set yet) + */ + resultp->aio_return = retval; + resultp->aio_errno = err; + + if (err == ECANCELED) { + _aio_outstand_cnt--; + } else { + if (rwflg) { + if (!sigev) + _aio_enq_doneq(rp); + _aio_outstand_cnt--; + } + + } + + /* + * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that + * it is waiting for completed I/Os. The number of required + * completed I/Os is stored into "_aio_waitncnt". + * aio_waitn() is woken up when + * - there are no further outstanding I/Os + * (_aio_outstand_cnt == 0) or + * - the expected number of I/Os has completed. + * Only one __aio_waitn() function waits for completed I/Os at + * a time. + * + * __aio_suspend() increments "_aio_suscv_cnt" to notify + * _aiodone() that at least one __aio_suspend() call is + * waiting for completed I/Os. + * There could be more than one __aio_suspend() function + * waiting for completed I/Os. Because every function should + * be waiting for different I/Os, _aiodone() has to wake up all + * __aio_suspend() functions each time. + * Every __aio_suspend() function will compare the recently + * completed I/O with its own list. + */ + if (_aio_flags & AIO_IO_WAITING) { + if (_aio_waitncnt > 0) + _aio_waitncnt--; + if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 || + _aio_suscv_cnt > 0) + (void) cond_broadcast(&_aio_iowait_cv); + } else { + /* Wake up waiting aio_suspend calls */ + if (_aio_suscv_cnt > 0) + (void) cond_broadcast(&_aio_iowait_cv); + } + + _aio_unlock(); + + /* + * __aio_waitn() sets AIO_WAIT_INPROGRESS and + * __aio_suspend() increments "_aio_kernel_suspend" + * when they are waiting in the kernel for completed I/Os. + * + * _kaio(AIONOTIFY) awakes the corresponding function + * in the kernel; then the corresponding __aio_waitn() or + * __aio_suspend() function could reap the recently + * completed I/Os (_aiodone()). + */ + if (err != ECANCELED) { + if (_aio_flags & AIO_WAIT_INPROGRESS || + _aio_kernel_suspend > 0) { + (void) _kaio(AIONOTIFY); + } + } + + rp->lio_signo = 0; + rp->lio_sigval.sival_int = 0; + if (head) { + /* + * If all the lio requests have completed, + * signal the waiting process + */ + (void) mutex_lock(&head->lio_mutex); + if (--head->lio_refcnt == 0) { + if (head->lio_mode == LIO_WAIT) + (void) cond_signal(&head->lio_cond_cv); + else { + rp->lio_signo = head->lio_signo; + rp->lio_sigval = head->lio_sigval; + } + } + (void) mutex_unlock(&head->lio_mutex); + } + if (sigev) { + _aio_req_add(rp, &__workers_si, AIOSIGEV); + return (1); + } + } else { + /* Solaris I/O */ + if (err == ECANCELED) + _aio_outstand_cnt--; + + _aio_unlock(); + + resultp = rp->req_resultp; + resultp->aio_return = retval; + resultp->aio_errno = err; + } + return (0); +} + +/* + * delete fsync requests from list head until there is + * only one left. return 0 when there is only one, otherwise + * return a non-zero value. + */ +static int +_aio_fsync_del(aio_req_t *rp, aio_lio_t *head) +{ + int refcnt; + + (void) mutex_lock(&head->lio_mutex); + if (head->lio_refcnt > 1 || head->lio_mode == LIO_DESTROY || + head->lio_canned) { + refcnt = --head->lio_refcnt; + if (refcnt || head->lio_canned) { + head->lio_nent--; + (void) mutex_unlock(&head->lio_mutex); + (void) mutex_lock(&__aio_mutex); + _aio_req_free(rp); + (void) mutex_unlock(&__aio_mutex); + if (head->lio_canned) { + ASSERT(refcnt >= 0); + return (0); + } + return (1); + } + ASSERT(head->lio_mode == LIO_DESTROY); + ASSERT(head->lio_nent == 1 && head->lio_refcnt == 0); + (void) mutex_unlock(&head->lio_mutex); + _aio_remove(rp); + return (0); + } + ASSERT(head->lio_refcnt == head->lio_nent); + (void) mutex_unlock(&head->lio_mutex); + return (0); +} + +/* + * worker is set idle when its work queue is empty. + * The worker checks again that it has no more work and then + * goes to sleep waiting for more work. + */ +void +_aio_idle(aio_worker_t *aiowp) +{ + (void) mutex_lock(&aiowp->work_lock); + if (aiowp->work_cnt1 == 0) { +#ifdef DEBUG + _idlecnt[aiowp->work_tid]++; +#endif + aiowp->work_idleflg = 1; + (void) cond_wait(&aiowp->work_idle_cv, &aiowp->work_lock); + /* + * idle flag is cleared before worker is awakened + * by aio_req_add(). + */ + } + (void) mutex_unlock(&aiowp->work_lock); +} + +/* + * A worker's completed AIO requests are placed onto a global + * done queue. The application is only sent a SIGIO signal if + * the process has a handler enabled and it is not waiting via + * aiowait(). + */ +static void +_aio_work_done(struct aio_worker *aiowp) +{ + struct aio_req *done_req = NULL; + + (void) mutex_lock(&aiowp->work_qlock1); + done_req = aiowp->work_prev1; + done_req->req_next = NULL; + aiowp->work_done1 = 0; + aiowp->work_tail1 = aiowp->work_next1; + if (aiowp->work_tail1 == NULL) + aiowp->work_head1 = NULL; + aiowp->work_prev1 = NULL; + (void) mutex_unlock(&aiowp->work_qlock1); + (void) mutex_lock(&__aio_mutex); + _aio_donecnt++; + _aio_outstand_cnt--; + _aio_req_done_cnt--; + ASSERT(_aio_donecnt > 0 && _aio_outstand_cnt >= 0); + ASSERT(done_req != NULL); + + if (_aio_done_tail == NULL) { + _aio_done_head = _aio_done_tail = done_req; + } else { + _aio_done_head->req_next = done_req; + _aio_done_head = done_req; + } + + if (_aiowait_flag) { + (void) mutex_unlock(&__aio_mutex); + (void) _kaio(AIONOTIFY); + } else { + (void) mutex_unlock(&__aio_mutex); + if (_sigio_enabled) { + (void) kill(__pid, SIGIO); + } + } +} + +/* + * the done queue consists of AIO requests that are in either the + * AIO_REQ_DONE or AIO_REQ_CANCELED state. requests that were cancelled + * are discarded. if the done queue is empty then NULL is returned. + * otherwise the address of a done aio_result_t is returned. + */ +struct aio_result_t * +_aio_req_done(void) +{ + struct aio_req *next; + aio_result_t *resultp; + + ASSERT(MUTEX_HELD(&__aio_mutex)); + + if ((next = _aio_done_tail) != NULL) { + _aio_done_tail = next->req_next; + ASSERT(_aio_donecnt > 0); + _aio_donecnt--; + (void) _aio_hash_del(next->req_resultp); + resultp = next->req_resultp; + ASSERT(next->req_state == AIO_REQ_DONE); + _aio_req_free(next); + return (resultp); + } + /* is queue empty? */ + if (next == NULL && _aio_outstand_cnt == 0) { + return ((aio_result_t *)-1); + } + return (NULL); +} + +/* + * add an AIO request onto the next work queue. a circular list of + * workers is used to choose the next worker. each worker has two + * work queues. if the lock for the first queue is busy then the + * request is placed on the second queue. the request is always + * placed on one of the two queues depending on which one is locked. + */ +void +_aio_req_add(aio_req_t *aiorp, aio_worker_t **nextworker, int mode) +{ + struct aio_worker *aiowp; + struct aio_worker *first; + int clogged = 0; + int found = 0; + int load_bal_flg; + int idleflg; + int qactive; + + aiorp->req_next = NULL; + ASSERT(*nextworker != NULL); + aiowp = *nextworker; + /* + * try to acquire the next worker's work queue. if it is locked, + * then search the list of workers until a queue is found unlocked, + * or until the list is completely traversed at which point another + * worker will be created. + */ + first = aiowp; + _aio_lock(); + __sigio_maskedcnt++; /* disable SIGIO */ + if (mode == AIOREAD || mode == AIOWRITE) { + _aio_outstand_cnt++; + load_bal_flg = 1; + } + _aio_unlock(); + switch (mode) { + case AIOREAD: + /* try to find an idle worker. */ + do { + if (mutex_trylock(&aiowp->work_qlock1) == 0) { + if (aiowp->work_idleflg) { + found = 1; + break; + } + (void) mutex_unlock( + &aiowp->work_qlock1); + } + } while ((aiowp = aiowp->work_forw) != first); + if (found) + break; + /*FALLTHROUGH*/ + case AIOWRITE: + while (mutex_trylock(&aiowp->work_qlock1)) { +#ifdef DEBUG + _qlocked++; +#endif + if (((aiowp = aiowp->work_forw)) == first) { + clogged = 1; + break; + } + } + /* + * create more workers when the workers appear + * overloaded. either all the workers are busy + * draining their queues, no worker's queue lock + * could be acquired, or the selected worker has + * exceeded its minimum work load, but has not + * exceeded the max number of workers. + */ + if (clogged) { +#ifdef DEBUG + _new_workers++; + _clogged++; +#endif + if (_aio_worker_cnt < _max_workers) { + if (_aio_create_worker(aiorp, mode)) + _aiopanic( + "_aio_req_add: clogged"); + _aio_lock(); + __sigio_maskedcnt--; + _aio_unlock(); + return; + } + + /* + * No worker available and we have created + * _max_workers, keep going through the + * list until we get a lock + */ + while (mutex_trylock(&aiowp->work_qlock1)) { + /* + * give someone else a chance + */ + thr_yield(); + aiowp = aiowp->work_forw; + } + + } + ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); + aiowp->work_minload1++; + if (_aio_worker_cnt < _max_workers && + aiowp->work_minload1 > _minworkload) { + aiowp->work_minload1 = 0; + (void) mutex_unlock(&aiowp->work_qlock1); +#ifdef DEBUG + _qfullcnt[aiowp->work_tid]++; + _new_workers++; + _newworker[aiowp->work_tid]++; + _avedone = _aio_submitcnt2/_new_workers; +#endif + (void) mutex_lock(&__aio_mutex); + *nextworker = aiowp->work_forw; + (void) mutex_unlock(&__aio_mutex); + if (_aio_create_worker(aiorp, mode)) + _aiopanic("aio_req_add: add worker"); + _aio_lock(); + __sigio_maskedcnt--; /* enable signals again */ + _aio_unlock(); /* send evt. SIGIO signal */ + return; + } + break; + case AIOFSYNC: + aiorp->req_op = mode; + /*FALLTHROUGH*/ + case AIOSIGEV: + load_bal_flg = 0; + (void) mutex_lock(&aiowp->work_qlock1); + break; + } + /* + * Put request onto worker's work queue. + */ + if (aiowp->work_tail1 == NULL) { + ASSERT(aiowp->work_cnt1 == 0); + aiowp->work_tail1 = aiorp; + aiowp->work_next1 = aiorp; + } else { + aiowp->work_head1->req_next = aiorp; + if (aiowp->work_next1 == NULL) + aiowp->work_next1 = aiorp; + } + aiorp->req_state = AIO_REQ_QUEUED; + aiorp->req_worker = aiowp; + aiowp->work_head1 = aiorp; + qactive = aiowp->work_cnt1++; + (void) mutex_unlock(&aiowp->work_qlock1); + if (load_bal_flg) { + _aio_lock(); + *nextworker = aiowp->work_forw; + _aio_unlock(); + } + /* + * Awaken worker if it is not currently active. + */ + if (!qactive) { + (void) mutex_lock(&aiowp->work_lock); + idleflg = aiowp->work_idleflg; + aiowp->work_idleflg = 0; + (void) mutex_unlock(&aiowp->work_lock); + if (idleflg) + (void) cond_signal(&aiowp->work_idle_cv); + } + _aio_lock(); + __sigio_maskedcnt--; /* enable signals again */ + _aio_unlock(); /* send SIGIO signal if pending */ +} + +/* + * get an AIO request for a specified worker. each worker has + * two work queues. find the first one that is not empty and + * remove this request from the queue and return it back to the + * caller. if both queues are empty, then return a NULL. + */ +aio_req_t * +_aio_req_get(aio_worker_t *aiowp) +{ + aio_req_t *next; + int mode; + + (void) mutex_lock(&aiowp->work_qlock1); + if ((next = aiowp->work_next1) != NULL) { + /* + * remove a POSIX request from the queue; the + * request queue is a singularly linked list + * with a previous pointer. The request is removed + * by updating the previous pointer. + * + * non-posix requests are left on the queue to + * eventually be placed on the done queue. + */ + + if (next->req_type == AIO_POSIX_REQ) { + if (aiowp->work_prev1 == NULL) { + aiowp->work_tail1 = next->req_next; + if (aiowp->work_tail1 == NULL) + aiowp->work_head1 = NULL; + } else { + aiowp->work_prev1->req_next = next->req_next; + if (aiowp->work_head1 == next) + aiowp->work_head1 = next->req_next; + } + + } else { + aiowp->work_prev1 = next; + ASSERT(aiowp->work_done1 >= 0); + aiowp->work_done1++; + } + ASSERT(next != next->req_next); + aiowp->work_next1 = next->req_next; + ASSERT(aiowp->work_cnt1 >= 1); + aiowp->work_cnt1--; + mode = next->req_op; + if (mode == AIOWRITE || mode == AIOREAD || mode == AIOAREAD64 || + mode == AIOAWRITE64) + aiowp->work_minload1--; +#ifdef DEBUG + _firstqcnt[aiowp->work_tid]++; +#endif + next->req_state = AIO_REQ_INPROGRESS; + _aio_cancel_on(aiowp); + } + aiowp->work_req = next; + ASSERT(next != NULL || (next == NULL && aiowp->work_cnt1 == 0)); + (void) mutex_unlock(&aiowp->work_qlock1); + return (next); +} + +static void +_aio_req_del(aio_worker_t *aiowp, aio_req_t *rp, int ostate) +{ + aio_req_t **last, *lastrp, *next; + + ASSERT(aiowp != NULL); + ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); + if (POSIX_AIO(rp)) { + if (ostate != AIO_REQ_QUEUED) + return; + } + last = &aiowp->work_tail1; + lastrp = aiowp->work_tail1; + ASSERT(ostate == AIO_REQ_QUEUED || ostate == AIO_REQ_INPROGRESS); + while ((next = *last) != NULL) { + if (next == rp) { + *last = next->req_next; + if (aiowp->work_next1 == next) + aiowp->work_next1 = next->req_next; + + if ((next->req_next != NULL) || + (aiowp->work_done1 == 0)) { + if (aiowp->work_head1 == next) + aiowp->work_head1 = next->req_next; + if (aiowp->work_prev1 == next) + aiowp->work_prev1 = next->req_next; + } else { + if (aiowp->work_head1 == next) + aiowp->work_head1 = lastrp; + if (aiowp->work_prev1 == next) + aiowp->work_prev1 = lastrp; + } + + if (ostate == AIO_REQ_QUEUED) { + ASSERT(aiowp->work_cnt1 >= 1); + aiowp->work_cnt1--; + } else { + ASSERT(ostate == AIO_REQ_INPROGRESS && + !POSIX_AIO(rp)); + aiowp->work_done1--; + } + return; + } + last = &next->req_next; + lastrp = next; + } + /* NOTREACHED */ +} + + +static void +_aio_enq_doneq(aio_req_t *reqp) +{ + if (_aio_doneq == NULL) { + _aio_doneq = reqp; + reqp->req_next = reqp; + reqp->req_prev = reqp; + } else { + reqp->req_next = _aio_doneq; + reqp->req_prev = _aio_doneq->req_prev; + reqp->req_prev->req_next = reqp; + _aio_doneq->req_prev = reqp; + } + reqp->req_state = AIO_REQ_DONEQ; + _aio_doneq_cnt++; +} + +/* + * caller owns the _aio_mutex + */ + +aio_req_t * +_aio_req_remove(aio_req_t *reqp) +{ + aio_req_t *head; + + if (reqp && reqp->req_state != AIO_REQ_DONEQ) + return (NULL); + + if (reqp) { + /* request in done queue */ + if (reqp->req_next == reqp) { + /* only one request on queue */ + _aio_doneq = NULL; + } else { + reqp->req_next->req_prev = reqp->req_prev; + reqp->req_prev->req_next = reqp->req_next; + if (reqp == _aio_doneq) + _aio_doneq = reqp->req_next; + } + _aio_doneq_cnt--; + return (reqp); + } + + if (_aio_doneq) { + head = _aio_doneq; + if (head == head->req_next) { + /* only one request on queue */ + _aio_doneq = NULL; + } else { + head->req_prev->req_next = head->req_next; + head->req_next->req_prev = head->req_prev; + _aio_doneq = head->req_next; + } + _aio_doneq_cnt--; + return (head); + } + return (NULL); + +} + +/* + * An AIO request is identified by an aio_result_t pointer. The AIO + * library maps this aio_result_t pointer to its internal representation + * via a hash table. This function adds an aio_result_t pointer to + * the hash table. + */ +static int +_aio_hash_insert(aio_result_t *resultp, aio_req_t *aiorp) +{ + uintptr_t i; + aio_req_t *next, **last; + + ASSERT(MUTEX_HELD(&__aio_mutex)); + i = AIOHASH(resultp); + last = (_aio_hash + i); + while ((next = *last) != NULL) { + if (resultp == next->req_resultp) + return (-1); + last = &next->req_link; + } + *last = aiorp; + ASSERT(aiorp->req_link == NULL); + return (0); +} + +/* + * remove an entry from the hash table. + */ +struct aio_req * +_aio_hash_del(aio_result_t *resultp) +{ + struct aio_req *next, **prev; + uintptr_t i; + + ASSERT(MUTEX_HELD(&__aio_mutex)); + i = AIOHASH(resultp); + prev = (_aio_hash + i); + while ((next = *prev) != NULL) { + if (resultp == next->req_resultp) { + *prev = next->req_link; + return (next); + } + prev = &next->req_link; + } + ASSERT(next == NULL); + return ((struct aio_req *)NULL); +} + +/* + * find an entry on the hash table + */ +struct aio_req * +_aio_hash_find(aio_result_t *resultp) +{ + struct aio_req *next, **prev; + uintptr_t i; + + /* + * no user AIO + */ + if (_aio_hash == NULL) + return (NULL); + + i = AIOHASH(resultp); + prev = (_aio_hash + i); + while ((next = *prev) != NULL) { + if (resultp == next->req_resultp) { + return (next); + } + prev = &next->req_link; + } + return (NULL); +} + +/* + * Allocate and free aios. They are cached. + */ +aio_req_t * +_aio_req_alloc(void) +{ + aio_req_t *aiorp; + int err; + + _aio_lock(); + while (_aio_freelist == NULL) { + _aio_unlock(); + err = 0; + (void) mutex_lock(&__aio_cachefillock); + if (__aio_cachefilling) + (void) cond_wait(&__aio_cachefillcv, + &__aio_cachefillock); + else + err = _fill_aiocache(HASHSZ); + (void) mutex_unlock(&__aio_cachefillock); + if (err) + return ((aio_req_t *)-1); + _aio_lock(); + } + aiorp = _aio_freelist; + _aio_freelist = _aio_freelist->req_link; + aiorp->req_type = 0; + aiorp->req_link = NULL; + aiorp->req_next = NULL; + aiorp->lio_head = NULL; + aiorp->aio_sigevent.sigev_notify = SIGEV_NONE; + _aio_freelist_cnt--; + _aio_unlock(); + return (aiorp); +} + +/* + * fill the aio request cache with empty aio request structures. + */ +int +_fill_aiocache(int n) +{ + aio_req_t *next, *aiorp, *first; + int cnt; + uintptr_t ptr; + int i; + + __aio_cachefilling = 1; + if ((ptr = (uintptr_t)malloc(sizeof (struct aio_req) * n)) == NULL) { + __aio_cachefilling = 0; + (void) cond_broadcast(&__aio_cachefillcv); + return (-1); + } + if (ptr & 0x7) + _aiopanic("_fill_aiocache"); + first = (struct aio_req *)ptr; + next = first; + cnt = n - 1; + for (i = 0; i < cnt; i++) { + aiorp = next++; + aiorp->req_state = AIO_REQ_FREE; + aiorp->req_link = next; + (void) mutex_init(&aiorp->req_lock, USYNC_THREAD, NULL); + (void) cond_init(&aiorp->req_cancv, USYNC_THREAD, NULL); + } + __aio_cachefilling = 0; + (void) cond_broadcast(&__aio_cachefillcv); + next->req_state = AIO_REQ_FREE; + next->req_link = NULL; + (void) mutex_init(&next->req_lock, USYNC_THREAD, NULL); + (void) cond_init(&next->req_cancv, USYNC_THREAD, NULL); + _aio_lock(); + _aio_freelist_cnt = n; + _aio_freelist = first; + _aio_unlock(); + return (0); +} + +/* + * put an aio request back onto the freelist. + */ +void +_aio_req_free(aio_req_t *aiorp) +{ + ASSERT(MUTEX_HELD(&__aio_mutex)); + aiorp->req_state = AIO_REQ_FREE; + aiorp->req_link = _aio_freelist; + _aio_freelist = aiorp; + _aio_freelist_cnt++; +} + +/* + * global aio lock that masks SIGIO signals. + */ +void +_aio_lock(void) +{ + __sigio_masked = 1; + (void) mutex_lock(&__aio_mutex); + __sigio_maskedcnt++; +} + +/* + * release global aio lock. send SIGIO signal if one + * is pending. + */ +void +_aio_unlock(void) +{ + if (--__sigio_maskedcnt == 0) + __sigio_masked = 0; + (void) mutex_unlock(&__aio_mutex); + if (__sigio_pending) + __aiosendsig(); +} + +/* + * AIO interface for POSIX + */ +int +_aio_rw(aiocb_t *cb, aio_lio_t *lio_head, aio_worker_t **nextworker, + int mode, int flg, struct sigevent *sigp) +{ + aio_req_t *aiorp = NULL; + aio_args_t *ap = NULL; + int kerr; + int umode; + + if (cb == NULL) { + errno = EINVAL; + return (-1); + } + + /* initialize kaio */ + if (!_kaio_ok) + _kaio_init(); + + cb->aio_state = NOCHECK; + + /* + * If _aio_rw() is called because a list I/O + * kaio() failed, we dont want to repeat the + * system call + */ + + if (flg & AIO_KAIO) { + /* + * Try kernel aio first. + * If errno is ENOTSUP/EBADFD, + * fall back to the thread implementation. + */ + if ((_kaio_ok > 0) && (KAIO_SUPPORTED(cb->aio_fildes))) { + cb->aio_resultp.aio_errno = EINPROGRESS; + cb->aio_state = CHECK; + kerr = (int)_kaio(mode, cb); + if (kerr == 0) + return (0); + else if ((errno != ENOTSUP) && (errno != EBADFD)) { + cb->aio_resultp.aio_errno = errno; + cb->aio_resultp.aio_return = -1; + cb->aio_state = NOCHECK; + return (-1); + } + if (errno == EBADFD) + SET_KAIO_NOT_SUPPORTED(cb->aio_fildes); + } + } + + cb->aio_resultp.aio_errno = EINPROGRESS; + cb->aio_state = USERAIO; + + if (!__uaio_ok) { + if (__uaio_init() == -1) + return (-1); + } + + aiorp = _aio_req_alloc(); + if (aiorp == (aio_req_t *)-1) { + errno = EAGAIN; + return (-1); + } + + /* + * If an LIO request, add the list head to the + * aio request + */ + aiorp->lio_head = lio_head; + aiorp->req_type = AIO_POSIX_REQ; + umode = ((mode == AIOFSYNC) ? mode : mode - AIOAREAD); + aiorp->req_op = umode; + + if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { + aiorp->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + aiorp->aio_sigevent.sigev_signo = + cb->aio_sigevent.sigev_signo; + aiorp->aio_sigevent.sigev_value.sival_ptr = + cb->aio_sigevent.sigev_value.sival_ptr; + } + + if (sigp) { + /* SIGEV_PORT */ + port_notify_t *pn = sigp->sigev_value.sival_ptr; + aiorp->aio_sigevent.sigev_notify = SIGEV_PORT; + aiorp->aio_sigevent.sigev_signo = pn->portnfy_port; + aiorp->aio_sigevent.sigev_value.sival_ptr = pn->portnfy_user; + } else if (cb->aio_sigevent.sigev_notify == SIGEV_PORT) { + port_notify_t *pn; + pn = cb->aio_sigevent.sigev_value.sival_ptr; + aiorp->aio_sigevent.sigev_notify = SIGEV_PORT; + aiorp->aio_sigevent.sigev_signo = pn->portnfy_port; + aiorp->aio_sigevent.sigev_value.sival_ptr = pn->portnfy_user; + } + + aiorp->req_resultp = &cb->aio_resultp; + aiorp->req_iocb = cb; + ap = &(aiorp->req_args); + ap->fd = cb->aio_fildes; + ap->buf = (caddr_t)cb->aio_buf; + ap->bufsz = cb->aio_nbytes; + ap->offset = cb->aio_offset; + + _aio_lock(); + if ((flg & AIO_NO_DUPS) && _aio_hash_insert(&cb->aio_resultp, aiorp)) { + _aio_req_free(aiorp); + _aio_unlock(); + errno = EINVAL; + return (-1); + } else { + _aio_unlock(); + _aio_req_add(aiorp, nextworker, umode); + return (0); + } +} + +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) +/* + * 64-bit AIO interface for POSIX + */ +int +_aio_rw64(aiocb64_t *cb, aio_lio_t *lio_head, aio_worker_t **nextworker, + int mode, int flg, struct sigevent *sigp) +{ + aio_req_t *aiorp = NULL; + aio_args_t *ap = NULL; + int kerr; + int umode; + + if (cb == NULL) { + errno = EINVAL; + return (-1); + } + + /* initialize kaio */ + if (!_kaio_ok) + _kaio_init(); + + cb->aio_state = NOCHECK; + + /* + * If _aio_rw() is called because a list I/O + * kaio() failed, we dont want to repeat the + * system call + */ + + if (flg & AIO_KAIO) { + /* + * Try kernel aio first. + * If errno is ENOTSUP/EBADFD, + * fall back to the thread implementation. + */ + if ((_kaio_ok > 0) && (KAIO_SUPPORTED(cb->aio_fildes))) { + cb->aio_resultp.aio_errno = EINPROGRESS; + cb->aio_state = CHECK; + kerr = (int)_kaio(mode, cb); + if (kerr == 0) + return (0); + else if ((errno != ENOTSUP) && (errno != EBADFD)) { + cb->aio_resultp.aio_errno = errno; + cb->aio_resultp.aio_return = -1; + cb->aio_state = NOCHECK; + return (-1); + } + if (errno == EBADFD) + SET_KAIO_NOT_SUPPORTED(cb->aio_fildes); + } + } + + cb->aio_resultp.aio_errno = EINPROGRESS; + cb->aio_state = USERAIO; + + if (!__uaio_ok) { + if (__uaio_init() == -1) + return (-1); + } + + + aiorp = _aio_req_alloc(); + if (aiorp == (aio_req_t *)-1) { + errno = EAGAIN; + return (-1); + } + + /* + * If an LIO request, add the list head to the + * aio request + */ + aiorp->lio_head = lio_head; + aiorp->req_type = AIO_POSIX_REQ; + + /* + * _aio_do_request() needs the original request code to be able + * to choose the appropriate 32/64 bit function. + */ + aiorp->req_op = mode; + + if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { + aiorp->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + aiorp->aio_sigevent.sigev_signo = + cb->aio_sigevent.sigev_signo; + aiorp->aio_sigevent.sigev_value.sival_ptr = + cb->aio_sigevent.sigev_value.sival_ptr; + } + + if (sigp) { + /* SIGEV_PORT */ + port_notify_t *pn = sigp->sigev_value.sival_ptr; + aiorp->aio_sigevent.sigev_notify = SIGEV_PORT; + aiorp->aio_sigevent.sigev_signo = pn->portnfy_port; + aiorp->aio_sigevent.sigev_value.sival_ptr = pn->portnfy_user; + } else if (cb->aio_sigevent.sigev_notify == SIGEV_PORT) { + port_notify_t *pn; + pn = cb->aio_sigevent.sigev_value.sival_ptr; + aiorp->aio_sigevent.sigev_notify = SIGEV_PORT; + aiorp->aio_sigevent.sigev_signo = pn->portnfy_port; + aiorp->aio_sigevent.sigev_value.sival_ptr = pn->portnfy_user; + } + + aiorp->req_resultp = &cb->aio_resultp; + aiorp->req_iocb = (aiocb_t *)cb; + ap = &(aiorp->req_args); + ap->fd = cb->aio_fildes; + ap->buf = (caddr_t)cb->aio_buf; + ap->bufsz = cb->aio_nbytes; + ap->offset = cb->aio_offset; + + _aio_lock(); + if ((flg & AIO_NO_DUPS) && _aio_hash_insert(&cb->aio_resultp, aiorp)) { + _aio_req_free(aiorp); + _aio_unlock(); + errno = EINVAL; + return (-1); + } else { + _aio_unlock(); + + /* + * _aio_req_add() only needs the difference between READ, + * WRITE and other to choose the right worker queue. + * AIOAREAD64 is mapped to AIOREAD and + * AIOAWRITE64 is mapped to AIOWRITE. + * mode is AIOAREAD64, AIOAWRITE64 or AIOFSYNC. + */ + umode = ((mode == AIOFSYNC) ? mode : mode - AIOAREAD64); + _aio_req_add(aiorp, nextworker, umode); + return (0); + } +} +#endif /* (_LARGEFILE64_SOURCE) && !defined(_LP64) */ diff --git a/usr/src/lib/libaio/common/libaio.h b/usr/src/lib/libaio/common/libaio.h new file mode 100644 index 0000000000..4c3d7a2af1 --- /dev/null +++ b/usr/src/lib/libaio/common/libaio.h @@ -0,0 +1,339 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBAIO_H +#define _LIBAIO_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <thread.h> +#include <asynch.h> +#include <setjmp.h> +#include <signal.h> +#include <siginfo.h> +#include <aio.h> +#include <limits.h> +#include <ucontext.h> + +#ifndef _REENTRANT +#define _REENTRANT +#endif + +#ifdef DEBUG +extern int assfail(char *, char *, int); +#define ASSERT(EX) ((void)((EX) || assfail(#EX, __FILE__, __LINE__))) +#else +#define ASSERT(EX) +#endif + +#define SIGAIOCANCEL SIGPROF /* special aio cancelation signal */ +#define AIO_WAITN_MAXIOCBS 32768 /* max. iocbs per system call */ + +typedef struct aio_args { + int fd; + caddr_t buf; + size_t bufsz; + offset_t offset; +} aio_args_t; + +/* + * list head for UFS list I/O + */ +typedef struct aio_lio { + char lio_mode; /* LIO_WAIT/LIO_NOWAIT */ + int lio_nent; /* Number of list I/O's */ + int lio_refcnt; /* outstanding I/O's */ + cond_t lio_cond_cv; /* list notification for I/O done */ + mutex_t lio_mutex; /* list mutex */ + struct aio_lio *lio_next; /* pointer to next on freelist */ + int lio_signo; /* Signal for LIO_NOWAIT */ + union sigval lio_sigval; /* Signal parameter */ + char lio_canned; /* lio was canceled */ +} aio_lio_t; + +/* + * size of aio_req should be power of 2. this helps to improve the + * effectiveness of the hashing function. + */ +typedef struct aio_req { + /* + * fields protected by _aio_mutex lock. + */ + struct aio_req *req_link; /* hash chain link */ + /* + * when req is on the doneq, then req_next is protected by + * the _aio_mutex lock. when the req is on a work q, then + * req_next is protected by a worker's work_qlock1 lock. + */ + struct aio_req *req_next; /* request/done queue link */ + struct aio_req *req_prev; /* double linked list */ + /* + * condition variable that waits for a request to be + * canceled. + */ + mutex_t req_lock; /* protects the following 2 fields */ + cond_t req_cancv; /* cancel req condition variable */ + char req_canned; /* set when canceled */ + /* + * fields protected by a worker's work_qlock1 lock. + */ + int req_state; /* AIO_REQ_QUEUED, ... */ + /* + * fields require no locking. + */ + int req_type; /* AIO_POSIX_REQ ? */ + struct aio_worker *req_worker; /* associate req. with worker */ + aio_result_t *req_resultp; /* address of result buffer */ + int req_op; /* read or write */ + aio_args_t req_args; /* arglist */ + aio_lio_t *lio_head; /* list head for LIO */ + int req_retval; /* resultp's retval */ + int req_errno; /* resultp's errno */ + char req_canwait; /* waiting for req to be canceled */ + struct sigevent aio_sigevent; + int lio_signo; /* Signal for LIO_NOWAIT */ + union sigval lio_sigval; /* Signal parameter */ + aiocb_t *req_iocb; /* ptr to aiocb */ +} aio_req_t; + +/* special request type for handling sigevent notification */ +#define AIOSIGEV AIOFSYNC+1 + +/* special lio type that destroys itself when lio refcnt becomes zero */ +#define LIO_FSYNC LIO_WAIT+1 +#define LIO_DESTROY LIO_FSYNC+1 + +/* lio flags */ +#define LIO_FSYNC_CANCELED 0x1 + +/* values for aios_state */ + +#define AIO_REQ_QUEUED 1 +#define AIO_REQ_INPROGRESS 2 +#define AIO_REQ_CANCELED 3 +#define AIO_REQ_DONE 4 +#define AIO_REQ_FREE 5 +#define AIO_LIO_DONE 6 +#define AIO_REQ_DONEQ 7 + +/* use KAIO in _aio_rw() */ +#define AIO_NO_KAIO 0x0 +#define AIO_KAIO 0x1 +#define AIO_NO_DUPS 0x2 + +#define AIO_POSIX_REQ 0x1 + +#define CHECK 1 +#define NOCHECK 2 +#define CHECKED 3 +#define USERAIO 4 +#define USERAIO_DONE 5 + +/* values for _aio_flags */ + +/* + * if set, _aiodone() notifies aio_waitn about done requests + * from the threads + */ +#define AIO_WAIT_INPROGRESS 0x1 + +/* + * if set, _aiodone() wakes up functions waiting for completed I/Os + */ +#define AIO_IO_WAITING 0x2 + +#define AIO_LIB_WAITN 0x4 /* aio_waitn in progress */ +#define AIO_LIB_WAITN_PENDING 0x8 /* aio_waitn requests pending */ + +/* + * Before a kaio() system call, the fd will be checked + * to ensure that kernel async. I/O is supported for this file. + * The only way to find out is if a kaio() call returns ENOTSUP, + * so the default will always be to try the kaio() call. Only in + * the specific instance of a kaio() call returning ENOTSUP + * will we stop submitting kaio() calls for that fd. + * If the fd is outside the array bounds, we will allow the kaio() + * call. + * + * The only way that an fd entry can go from ENOTSUP to supported + * is if that fd is freed up by a close(), and close will clear + * the entry for that fd. + * + * Each fd gets a bit in the array _kaio_supported[]. + * + * uint32_t _kaio_supported[MAX_KAIO_FDARRAY_SIZE]; + * + * Array is MAX_KAIO_ARRAY_SIZE of 32-bit elements, for 4kb. + * If more than (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE ) + * files are open, this can be expanded. + */ + +#define MAX_KAIO_FDARRAY_SIZE 1024 +#define KAIO_FDARRAY_ELEM_SIZE WORD_BIT /* uint32_t */ + +#define MAX_KAIO_FDS (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE) + +#define VALID_FD(fdes) (((fdes) >= 0) && ((fdes) < MAX_KAIO_FDS)) + +#define KAIO_SUPPORTED(fdes) \ + ((!VALID_FD(fdes)) || \ + ((_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] & \ + (uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))) == 0)) + +#define SET_KAIO_NOT_SUPPORTED(fdes) \ + if (VALID_FD((fdes))) \ + _kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] |= \ + (uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE)) + +#define CLEAR_KAIO_SUPPORTED(fdes) \ + if (VALID_FD((fdes))) \ + _kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] &= \ + ~(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE)) + +typedef struct aio_worker { + /* + * fields protected by _aio_mutex lock + */ + struct aio_worker *work_forw; /* forward link in list of workers */ + struct aio_worker *work_backw; /* backwards link in list of workers */ + /* + * fields require no locking. + */ + thread_t work_tid; /* worker's thread-id */ + mutex_t work_qlock1; /* lock for work queue 1 */ + struct aio_req *work_head1; /* head of work request queue 1 */ + struct aio_req *work_tail1; /* tail of work request queue 1 */ + struct aio_req *work_next1; /* work queue one's next pointer */ + struct aio_req *work_prev1; /* last request done from queue 1 */ + int work_cnt1; /* length of work queue one */ + int work_done1; /* number of requests done */ + int work_minload1; /* min length of queue */ + struct aio_req *work_req; /* active work request */ + int work_idleflg; /* when set, worker is idle */ + cond_t work_idle_cv; /* place to sleep when idle */ + mutex_t work_lock; /* protects work flags */ + sigjmp_buf work_jmp_buf; /* cancellation point */ + char work_cancel_flg; /* flag set when at cancellation pt */ +} aio_worker_t; + +extern void _kaio_init(void); +extern intptr_t _kaio(int, ...); +extern int _aiorw(int, caddr_t, int, offset_t, int, aio_result_t *, int); +extern int _aio_rw(aiocb_t *, aio_lio_t *, aio_worker_t **, int, int, + struct sigevent *); +extern int __aio_fsync(int, aiocb_t *); +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) +extern int _aio_rw64(aiocb64_t *, aio_lio_t *, aio_worker_t **, int, int, + struct sigevent *); +extern int __aio_fsync64(int, aiocb64_t *); +#endif +extern int aiocancel_all(int); +extern int _aio_create_worker(aio_req_t *, int); +extern void *_aio_send_sigev(void *); + +extern void _aio_cancel_on(aio_worker_t *); +extern void _aio_cancel_off(aio_worker_t *); +extern int _aio_cancel_req(aio_worker_t *, aio_req_t *, int *, int *); + +extern void _aio_forkinit(void); +extern void _aiopanic(char *); +extern void _aio_lock(void); +extern void _aio_unlock(void); +extern void _aio_req_free(aio_req_t *); +extern aio_req_t *_aio_hash_del(aio_result_t *); +extern int _fill_aiocache(int); + +extern aio_worker_t *_aio_alloc_worker(void); +extern void _aio_free_worker(void *); + +extern void _aio_idle(struct aio_worker *); +extern void __aiosendsig(void); +extern void *_aio_do_request(void *); +extern void _aio_remove(aio_req_t *); +extern void _lio_remove(aio_lio_t *); +extern aio_req_t *_aio_req_remove(aio_req_t *); +extern int _aio_get_timedelta(struct timespec *, struct timespec *); + +extern int _close(int); +extern int __sigqueue(pid_t pid, int signo, + /* const union sigval */ void *value, int si_code); +extern pid_t _fork(void); +extern int _sigaction(int sig, const struct sigaction *act, + struct sigaction *oact); +extern int _sigemptyset(sigset_t *set); +extern int _sigaddset(sigset_t *set, int signo); +extern int _sigismember(sigset_t *set, int signo); +extern int _sigprocmask(int how, const sigset_t *set, sigset_t *oset); +extern void aiosigcancelhndlr(int, siginfo_t *, void *); + +extern aio_worker_t *__nextworker_rd; /* worker chosen for next rd request */ +extern aio_worker_t *__workers_rd; /* list of all rd workers */ +extern int __rd_workerscnt; /* number of rd workers */ +extern aio_worker_t *__nextworker_wr; /* worker chosen for next wr request */ +extern aio_worker_t *__workers_wr; /* list of all wr workers */ +extern int __wr_workerscnt; /* number of wr workers */ +extern aio_worker_t *__nextworker_si; /* worker chosen for next si request */ +extern aio_worker_t *__workers_si; /* list of all si workers */ +extern int __si_workerscnt; /* number of si workers */ +extern int __aiostksz; /* stack size for workers */ +extern mutex_t __aio_mutex; /* global aio lock that's SIGIO-safe */ +extern mutex_t __lio_mutex; /* global lio lock */ +extern int _max_workers; /* max number of workers permitted */ +extern int _min_workers; /* min number of workers */ +extern sigset_t _worker_set; /* worker's signal mask */ +extern int _aio_worker_cnt; /* number of AIO workers */ +extern int _sigio_enabled; /* when set, send SIGIO signal */ +extern int __sigio_pending; /* count of pending SIGIO signals */ +extern int __sigio_masked; /* when set, SIGIO is masked */ +extern int __sigio_maskedcnt; /* count number times bit mask is set */ +extern pid_t __pid; /* process's PID */ +extern int _kaio_ok; /* indicates if kaio is initialized */ +extern thread_key_t _aio_key; /* for thread-specific data */ +extern struct sigaction sigcanact; /* action for SIGAIOCANCEL */ +extern int _pagesize; + +/* + * Array for determining whether or not a file supports kaio + * + */ +extern uint32_t _kaio_supported[]; + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBAIO_H */ diff --git a/usr/src/lib/libaio/common/llib-laio b/usr/src/lib/libaio/common/llib-laio new file mode 100644 index 0000000000..9bf215ab3a --- /dev/null +++ b/usr/src/lib/libaio/common/llib-laio @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <signal.h> +#include <libaio.h> + +/* + * usr/src/lib/libaio/common + */ + +/* aio.c */ +int aioread(int fd, caddr_t buf, int bufsz, off_t offset, int whence, + aio_result_t *resultp); +int aiowrite(int fd, caddr_t buf, int bufsz, off_t offset, int whence, + aio_result_t *resultp); +int aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, + aio_result_t *resultp); +int aiowrite64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, + aio_result_t *resultp); +int aiocancel(aio_result_t *resultp); +aio_result_t *aiowait(struct timeval *uwait); + +/* scalls.c */ +int _libaio_close(int fd); +pid_t _libaio_fork(void); + +/* ma.c */ + +/* posix_aio.c */ +int __aio_read(aiocb_t *cb); +int __aio_write(aiocb_t *cb); +int __lio_listio(int mode, aiocb_t * const list[], + int nent, struct sigevent *sig); +int __aio_suspend(void **list, int nent, const timespec_t *timo, int lf); +int __aio_error(aiocb_t *cb); +ssize_t __aio_return(aiocb_t *cb); +int __aio_fsync(int op, aiocb_t *aiocbp); +int __aio_cancel(int fd, aiocb_t *aiocbp); +int __aio_waitn(void **list, uint_t nent, uint_t *nwait, + const struct timespec *timeout, int mode); +int __aio_read64(aiocb64_t *cb); +int __aio_write64(aiocb64_t *cb); +int __lio_listio64(int mode, aiocb64_t *const list[], + int nent, struct sigevent *sig); +int __aio_error64(aiocb64_t *cb); +ssize_t __aio_return64(aiocb64_t *cb); +int __aio_fsync64(int op, aiocb64_t *aiocbp); +int __aio_cancel64(int fd, aiocb64_t *aiocbp); + +/* sig.c */ + +/* subr.c */ +int assfail(char *a, char *f, int l); diff --git a/usr/src/lib/libaio/common/ma.c b/usr/src/lib/libaio/common/ma.c new file mode 100644 index 0000000000..07f540f9c1 --- /dev/null +++ b/usr/src/lib/libaio/common/ma.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1992-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#include "libaio.h" + +/* + * Allocate a worker control block. + * We just use malloc(), like everywhere else in libaio. + * A more sophisticated allocator could be used, but oh well... + */ +aio_worker_t * +_aio_alloc_worker() +{ + aio_worker_t *aiowp; + + aiowp = malloc(sizeof (aio_worker_t)); + if (aiowp != NULL) { + (void) memset(aiowp, 0, sizeof (aio_worker_t)); + (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL); + (void) mutex_init(&aiowp->work_lock, USYNC_THREAD, NULL); + (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL); + } + return (aiowp); +} + +/* + * Free a worker control block. + * Declared with void *arg so it can be a thr_keycreate() destructor. + */ +void +_aio_free_worker(void *arg) +{ + free(arg); +} diff --git a/usr/src/lib/libaio/common/posix_aio.c b/usr/src/lib/libaio/common/posix_aio.c new file mode 100644 index 0000000000..15155fceeb --- /dev/null +++ b/usr/src/lib/libaio/common/posix_aio.c @@ -0,0 +1,1720 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * posix_aio.c implements the POSIX async. I/O + * functions for librt + * + * aio_read + * aio_write + * aio_error + * aio_return + * aio_suspend + * lio_listio + * aio_fsync + * aio_cancel + */ + +#include "libaio.h" +#include <sys/file.h> + +extern int __fdsync(int, int); +extern aio_req_t *_aio_hash_find(aio_result_t *); + +/* __aio_suspend stuff */ + +extern int _aio_kernel_suspend; +extern int _aio_suscv_cnt; + +/* __aio_waitn stuff */ + +static mutex_t __aio_waitn_mutex = DEFAULTMUTEX; /* 1 aio_waitn per process */ +static cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */ +extern int _aio_flags; +extern cond_t _aio_iowait_cv; +extern int _aio_doneq_cnt; +extern int _aio_outstand_cnt; +extern int _aio_waitncnt; + +static int _aio_check_timeout(const struct timespec *, struct timespec *, + int *); + +/* defines for timedwait in __aio_waitn() and __aio_suspend() */ +#define AIO_TIMEOUT_INDEF -1 +#define AIO_TIMEOUT_POLL 0 +#define AIO_TIMEOUT_WAIT 1 +#define AIO_TIMEOUT_UNDEF 2 + +/* + * List I/O list head stuff + */ +static aio_lio_t *_lio_head_freelist = NULL; +static int _aio_lio_alloc(aio_lio_t **); +static void _aio_lio_free(aio_lio_t *); +static void _lio_list_decr(aio_lio_t *); + +int +__aio_read(aiocb_t *cb) +{ + aio_lio_t *head = NULL; + + if ((cb == NULL) || cb->aio_reqprio < 0) { + errno = EINVAL; + return (-1); + } + + cb->aio_lio_opcode = LIO_READ; + return (_aio_rw(cb, head, &__nextworker_rd, AIOAREAD, + (AIO_KAIO | AIO_NO_DUPS), NULL)); +} + +int +__aio_write(aiocb_t *cb) +{ + aio_lio_t *head = NULL; + + if ((cb == NULL) || cb->aio_reqprio < 0) { + errno = EINVAL; + return (-1); + } + + cb->aio_lio_opcode = LIO_WRITE; + return (_aio_rw(cb, head, &__nextworker_wr, AIOAWRITE, + (AIO_KAIO | AIO_NO_DUPS), NULL)); +} + + +int +__lio_listio(int mode, aiocb_t * const list[], + int nent, struct sigevent *sig) +{ + int i, err; + int aio_ufs = 0; + int oerrno = 0; + aio_lio_t *head = NULL; + int state = 0; + static long aio_list_max = 0; + aio_worker_t **nextworker; + int EIOflg = 0; + int rw; + int do_kaio = 0; + + if (!_kaio_ok) + _kaio_init(); + + if (aio_list_max == 0) + aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); + + if (nent < 0 || (long)nent > aio_list_max) { + errno = EINVAL; + return (-1); + } + + switch (mode) { + case LIO_WAIT: + state = NOCHECK; + break; + case LIO_NOWAIT: + state = CHECK; + break; + default: + errno = EINVAL; + return (-1); + } + + for (i = 0; i < nent; i++) { + if (list[i]) { + if (list[i]->aio_lio_opcode != LIO_NOP) { + list[i]->aio_state = state; + if (KAIO_SUPPORTED(list[i]->aio_fildes)) + do_kaio++; + else + list[i]->aio_resultp.aio_errno = + ENOTSUP; + } else + list[i]->aio_state = NOCHECK; + } + } + + if (do_kaio) { + if ((err = (int)_kaio(AIOLIO, mode, list, nent, sig)) == 0) + return (0); + oerrno = errno; + } else { + oerrno = errno = ENOTSUP; + err = -1; + } + if ((err == -1) && (errno == ENOTSUP)) { + err = errno = 0; + /* + * If LIO_WAIT, or signal required, allocate a list head. + */ + if ((mode == LIO_WAIT) || ((sig) && + (sig->sigev_notify == SIGEV_SIGNAL))) + (void) _aio_lio_alloc(&head); + if (head) { + (void) mutex_lock(&head->lio_mutex); + head->lio_mode = (char)mode; + if ((mode == LIO_NOWAIT) && (sig) && + (sig->sigev_notify != SIGEV_NONE) && + (sig->sigev_signo > 0)) { + head->lio_signo = sig->sigev_signo; + head->lio_sigval.sival_ptr = + sig->sigev_value.sival_ptr; + } else + head->lio_signo = 0; + head->lio_nent = head->lio_refcnt = nent; + (void) mutex_unlock(&head->lio_mutex); + } + /* + * find UFS requests, errno == ENOTSUP/EBADFD, + */ + for (i = 0; i < nent; i++) { + if (list[i] && + ((list[i]->aio_resultp.aio_errno == ENOTSUP) || + (list[i]->aio_resultp.aio_errno == EBADFD))) { + if (list[i]->aio_lio_opcode == LIO_NOP) { + if (head) + _lio_list_decr(head); + continue; + } + if (list[i]->aio_resultp.aio_errno == EBADFD) + SET_KAIO_NOT_SUPPORTED( + list[i]->aio_fildes); + if (list[i]->aio_reqprio < 0) { + list[i]->aio_resultp.aio_errno = + EINVAL; + list[i]->aio_resultp.aio_return = -1; + EIOflg = 1; + if (head) + _lio_list_decr(head); + continue; + } + /* + * submit an AIO request with flags AIO_NO_KAIO + * to avoid the kaio() syscall in _aio_rw() + */ + switch (list[i]->aio_lio_opcode) { + case LIO_READ: + rw = AIOAREAD; + nextworker = &__nextworker_rd; + break; + case LIO_WRITE: + rw = AIOAWRITE; + nextworker = &__nextworker_wr; + break; + } + if (sig && sig->sigev_notify == SIGEV_PORT) + err = _aio_rw(list[i], head, nextworker, + rw, (AIO_NO_KAIO | AIO_NO_DUPS), + sig); + else + err = _aio_rw(list[i], head, nextworker, + rw, (AIO_NO_KAIO | AIO_NO_DUPS), + NULL); + if (err != 0) { + if (head) + _lio_list_decr(head); + list[i]->aio_resultp.aio_errno = err; + EIOflg = 1; + } else + aio_ufs++; + + } else { + if (head) + _lio_list_decr(head); + continue; + } + } + } + if (EIOflg) { + errno = EIO; + return (-1); + } + if ((mode == LIO_WAIT) && (oerrno == ENOTSUP)) { + /* + * call kaio(AIOLIOWAIT) to get all outstanding + * kernel AIO requests + */ + if ((nent - aio_ufs) > 0) { + (void) _kaio(AIOLIOWAIT, mode, list, nent, sig); + } + if (head && head->lio_nent > 0) { + (void) mutex_lock(&head->lio_mutex); + while (head->lio_refcnt > 0) { + errno = cond_wait(&head->lio_cond_cv, + &head->lio_mutex); + if (errno) { + (void) mutex_unlock(&head->lio_mutex); + return (-1); + } + } + (void) mutex_unlock(&head->lio_mutex); + for (i = 0; i < nent; i++) { + if (list[i] && + list[i]->aio_resultp.aio_errno) { + errno = EIO; + return (-1); + } + } + } + return (0); + } + return (err); +} + +static void +_lio_list_decr(aio_lio_t *head) +{ + (void) mutex_lock(&head->lio_mutex); + head->lio_nent--; + head->lio_refcnt--; + (void) mutex_unlock(&head->lio_mutex); +} + +extern void _cancelon(void); +extern void _canceloff(void); + +int +__aio_suspend(void **list, int nent, const timespec_t *timo, int largefile) +{ + int cv_err; /* error code from cond_xxx() */ + int kerr; /* error code from _kaio(AIOSUSPEND) */ + int i; + struct timespec twait; /* copy of timo for internal calculations */ + struct timespec *wait = NULL; + int timedwait; + int req_outstanding; + aiocb_t **listp; + aiocb64_t **listp64; + hrtime_t hrtstart; + hrtime_t hrtend; + hrtime_t hrtres; + + if (nent <= 0) { + errno = EINVAL; + return (-1); + } + + if (timo) { + if (timo->tv_sec < 0 || timo->tv_nsec < 0 || + timo->tv_nsec >= NANOSEC) { + errno = EINVAL; + return (-1); + } + /* Initialize start time if time monitoring desired */ + if (timo->tv_sec > 0 || timo->tv_nsec > 0) { + timedwait = AIO_TIMEOUT_WAIT; + hrtstart = gethrtime(); + } else { + /* content of timeout = 0 : polling */ + timedwait = AIO_TIMEOUT_POLL; + } + } else { + /* timeout pointer = NULL : wait indefinitely */ + timedwait = AIO_TIMEOUT_INDEF; + } + + if (largefile) { + /* _LARGEFILE64_SOURCE && !_LP64 */ + listp64 = (aiocb64_t **)list; + for (i = 0; i < nent; i++) { + if (listp64[i] && listp64[i]->aio_state == CHECK) + listp64[i]->aio_state = CHECKED; + } + } else { + listp = (aiocb_t **)list; + for (i = 0; i < nent; i++) { + if (listp[i] && listp[i]->aio_state == CHECK) + listp[i]->aio_state = CHECKED; + } + } + + /* + * The next "if -case" is required to accelerate the + * access to completed RAW-IO requests. + */ + + if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { + /* Only kernel requests pending */ + + _cancelon(); + + /* + * _aio_kernel_suspend is used to detect completed non RAW-IO + * requests. + * As long as this thread resides in the kernel (_kaio) further + * asynchronous non RAW-IO requests could be submitted. + */ + _aio_lock(); + _aio_kernel_suspend++; + _aio_unlock(); + + /* + * Always do the kaio() call without using the KAIO_SUPPORTED() + * checks because it is not mandatory to have a valid fd + * set in the list entries, only the resultp must be set. + * + * _kaio(AIOSUSPEND ...) return values : + * 0: everythink ok, completed request found + * -1: error + * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,) + * system call using _kaio(AIONOTIFY). It means, that some + * non RAW-IOs completed inbetween. + */ + + if (largefile) + kerr = (int)_kaio(AIOSUSPEND64, list, nent, timo, -1); + else + kerr = (int)_kaio(AIOSUSPEND, list, nent, timo, -1); + + _aio_lock(); + _aio_kernel_suspend--; + _aio_unlock(); + + _canceloff(); + if (!kerr) + return (0); + } else { + kerr = 1; /* simulation: _kaio detected AIONOTIFY */ + } + + /* Return kernel error code, if no other IOs are outstanding */ + + _aio_lock(); + req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt; + _aio_unlock(); + + if (req_outstanding == 0) { + /* no IOs outstanding in the thread pool */ + if (kerr == 1) + /* return "no IOs completed" */ + errno = EAGAIN; + return (-1); + } + + /* IOs using the thread pool are outstanding */ + + if (timedwait == AIO_TIMEOUT_WAIT) { + /* time monitoring */ + hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC + + (hrtime_t)timo->tv_nsec; + hrtres = hrtend - gethrtime(); + if (hrtres <= 0) + hrtres = 1; + twait.tv_sec = hrtres / (hrtime_t)NANOSEC; + twait.tv_nsec = hrtres % (hrtime_t)NANOSEC; + wait = &twait; + } else { + if (timedwait == AIO_TIMEOUT_POLL) { + twait = *timo; /* content of timo = 0 : polling */ + wait = &twait; + } + } + + for (;;) { + int aio_errno; + int aio_inprogress; + + /* first scan file system requests */ + aio_inprogress = 0; + if (largefile) { + for (i = 0; i < nent; i++) { + if (listp64[i] == NULL) + continue; + aio_errno = listp64[i]->aio_resultp.aio_errno; + if (aio_errno == EINPROGRESS) { + aio_inprogress = 1; + } else { + if (aio_errno != ECANCELED) { + errno = 0; + return (0); + } + } + } + } else { + for (i = 0; i < nent; i++) { + if (listp[i] == NULL) + continue; + aio_errno = listp[i]->aio_resultp.aio_errno; + if (aio_errno == EINPROGRESS) { + aio_inprogress = 1; + } else { + if (aio_errno != ECANCELED) { + errno = 0; + return (0); + } + } + } + } + + /* + * If there aren't outstanding I/Os in the thread pool then + * we have to return here, provided that all kernel RAW-IOs + * also completed. + * If the kernel was notified to return, then we have to check + * possible pending RAW-IOs. + */ + if (_aio_outstand_cnt == 0 && aio_inprogress == 0 && + kerr != 1) { + errno = EAGAIN; + break; + } + + /* + * There are outstanding IOs in the thread pool or the kernel + * was notified to return. + * Check pending RAW-IOs first. + */ + if (kerr == 1) { + /* + * _aiodone just notified the kernel about + * completed non RAW-IOs (AIONOTIFY was detected). + */ + if (timedwait == AIO_TIMEOUT_WAIT) { + /* Update remaining timeout for the kernel */ + hrtres = hrtend - gethrtime(); + if (hrtres <= 0) { + /* timer expired */ + errno = EAGAIN; + break; + } + wait->tv_sec = hrtres / (hrtime_t)NANOSEC; + wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; + } + _aio_lock(); + _aio_kernel_suspend++; + _aio_unlock(); + + _cancelon(); + if (largefile) + kerr = (int)_kaio(AIOSUSPEND64, list, nent, + wait, -1); + else + kerr = (int)_kaio(AIOSUSPEND, list, nent, + wait, -1); + _canceloff(); + + _aio_lock(); + _aio_kernel_suspend--; + _aio_unlock(); + + if (!kerr) { + return (0); + } + } + + if (timedwait == AIO_TIMEOUT_POLL) { + errno = EAGAIN; + break; + } + + if (timedwait == AIO_TIMEOUT_WAIT) { + /* Update remaining timeout */ + hrtres = hrtend - gethrtime(); + if (hrtres <= 0) { + /* timer expired */ + errno = EAGAIN; + break; + } + wait->tv_sec = hrtres / (hrtime_t)NANOSEC; + wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; + } + + _aio_lock(); + if (_aio_outstand_cnt == 0) { + _aio_unlock(); + continue; + } + + _aio_suscv_cnt++; /* ID for _aiodone (wake up) */ + + if (timedwait == AIO_TIMEOUT_WAIT) { + cv_err = cond_reltimedwait(&_aio_iowait_cv, + &__aio_mutex, wait); + + if (cv_err == ETIME) + cv_err = EAGAIN; + } else { + /* wait indefinitely */ + cv_err = cond_wait(&_aio_iowait_cv, &__aio_mutex); + } + + _aio_suscv_cnt--; + _aio_unlock(); + + if (cv_err) { + errno = cv_err; + break; + } + } + return (-1); +} + +int +__aio_error(aiocb_t *cb) +{ + aio_req_t *reqp; + int aio_errno = cb->aio_resultp.aio_errno; + + if (aio_errno == EINPROGRESS) { + if (cb->aio_state == CHECK) { + /* + * Always do the kaio() call without using + * the KAIO_SUPPORTED() + * checks because it is not mandatory to + * have a valid fd + * set in the aiocb, only the resultp must be set. + */ + if (((int)_kaio(AIOERROR, cb)) == EINVAL) { + errno = EINVAL; + return (-1); + } + } else if (cb->aio_state == CHECKED) + cb->aio_state = CHECK; + } else if (cb->aio_state == USERAIO) { + _aio_lock(); + if (reqp = _aio_hash_find(&cb->aio_resultp)) { + cb->aio_state = NOCHECK; + _lio_remove(reqp->lio_head); + (void) _aio_hash_del(reqp->req_resultp); + (void) _aio_req_remove(reqp); + _aio_req_free(reqp); + } + _aio_unlock(); + } + return (aio_errno); +} + +ssize_t +__aio_return(aiocb_t *cb) +{ + ssize_t ret; + aio_req_t *reqp; + + /* + * graceful detection of an invalid cb is not possible. a + * SIGSEGV will be generated if it is invalid. + */ + if (cb == NULL) { + errno = EINVAL; + exit(-1); + } + + /* + * we use this condition to indicate that + * aio_return has been called before + */ + if (cb->aio_resultp.aio_return == -1 && + cb->aio_resultp.aio_errno == EINVAL) { + errno = EINVAL; + return (-1); + } + + /* + * Before we return mark the result as being returned so that later + * calls to aio_return() will return the fact that the result has + * already been returned + */ + ret = cb->aio_resultp.aio_return; + cb->aio_resultp.aio_return = -1; + cb->aio_resultp.aio_errno = EINVAL; + if (cb->aio_state == USERAIO) { + _aio_lock(); + if (reqp = _aio_hash_find(&cb->aio_resultp)) { + cb->aio_state = NOCHECK; + _lio_remove(reqp->lio_head); + (void) _aio_hash_del(reqp->req_resultp); + (void) _aio_req_remove(reqp); + _aio_req_free(reqp); + } + _aio_unlock(); + } + return (ret); + +} + +void +_lio_remove(aio_lio_t *head) +{ + int refcnt; + + if (head) { + (void) mutex_lock(&head->lio_mutex); + refcnt = --head->lio_nent; + (void) mutex_unlock(&head->lio_mutex); + if (!refcnt) + _aio_lio_free(head); + } +} + +void +_aio_remove(aio_req_t *reqp) +{ + _lio_remove(reqp->lio_head); + _aio_lock(); + (void) _aio_hash_del(reqp->req_resultp); + (void) _aio_req_remove(reqp); + _aio_req_free(reqp); + _aio_unlock(); +} + +int +_aio_lio_alloc(aio_lio_t **head) +{ + aio_lio_t *lio_head; + + (void) mutex_lock(&__lio_mutex); + if (_lio_head_freelist == NULL) { + lio_head = (aio_lio_t *)malloc(sizeof (aio_lio_t)); + } else { + lio_head = _lio_head_freelist; + _lio_head_freelist = lio_head->lio_next; + } + if (lio_head == NULL) { + (void) mutex_unlock(&__lio_mutex); + return (-1); + } + (void) memset(lio_head, 0, sizeof (aio_lio_t)); + (void) cond_init(&lio_head->lio_cond_cv, USYNC_THREAD, NULL); + (void) mutex_init(&lio_head->lio_mutex, USYNC_THREAD, NULL); + *head = lio_head; + (void) mutex_unlock(&__lio_mutex); + return (0); +} + +void +_aio_lio_free(aio_lio_t *head) +{ + (void) mutex_lock(&__lio_mutex); + head->lio_next = _lio_head_freelist; + _lio_head_freelist = head; + (void) mutex_unlock(&__lio_mutex); +} + +/* + * This function returns the number of asynchronous I/O requests submitted. + */ + +static int +__aio_fsync_bar(aiocb_t *cb, aio_lio_t *head, aio_worker_t *aiowp, + int workerscnt) +{ + int i; + int err; + aio_worker_t *next = aiowp; + + for (i = 0; i < workerscnt; i++) { + err = _aio_rw(cb, head, &next, AIOFSYNC, AIO_NO_KAIO, NULL); + if (err != 0) { + (void) mutex_lock(&head->lio_mutex); + head->lio_mode = LIO_DESTROY; /* ignore fsync */ + head->lio_nent -= workerscnt - i; + head->lio_refcnt -= workerscnt - i; + (void) mutex_unlock(&head->lio_mutex); + errno = EAGAIN; + return (i); + } + next = next->work_forw; + } + return (i); +} + +/* + * This function is called from aio_fsync(3RT). + */ + +int +__aio_fsync(int op, aiocb_t *cb) +{ + struct stat buf; + aio_lio_t *head; + int retval; + + if (cb == NULL) { + return (0); + } + + if ((op != O_DSYNC) && (op != O_SYNC)) { + errno = EINVAL; + return (-1); + } + + if (fstat(cb->aio_fildes, &buf) < 0) + return (-1); + + /* + * The first asynchronous I/O request in the current process + * will create a bunch of workers. + * If the sum of workers (read + write) is zero then the + * number of pending asynchronous I/O requests is zero. + * In such a case only execute the standard fsync(3C) or + * fdatasync(3RT) as appropriate (see flag of __fdsync()). + */ + if ((__wr_workerscnt + __rd_workerscnt) == 0) { + if (op == O_DSYNC) + return (__fdsync(cb->aio_fildes, FDSYNC)); + else + return (__fdsync(cb->aio_fildes, FSYNC)); + } + + /* + * re-use aio_offset as the op field. + * O_DSYNC - fdatasync() + * O_SYNC - fsync() + */ + cb->aio_offset = op; + cb->aio_lio_opcode = AIOFSYNC; + + /* + * create a list of fsync requests. the worker + * that gets the last request will do the fsync + * request. + */ + (void) _aio_lio_alloc(&head); + if (head == NULL) { + errno = EAGAIN; + return (-1); + } + head->lio_mode = LIO_FSYNC; + head->lio_signo = 0; + head->lio_nent = head->lio_refcnt = __wr_workerscnt + __rd_workerscnt; + /* insert an fsync request on every read workers' queue. */ + retval = __aio_fsync_bar(cb, head, __workers_rd, __rd_workerscnt); + if (retval != __rd_workerscnt) { + /* + * Less fsync requests than workers means that + * it was not possible to submit fsync requests to all + * workers. + * Actions: + * a) number of fsync requests submitted is 0: + * => free allocated memory (aio_lio_t). + * b) number of fsync requests submitted is > 0: + * => the last worker executing the fsync request + * will free the aio_lio_t struct. + */ + if (retval == 0) + _aio_lio_free(head); + return (-1); + } + + /* insert an fsync request on every write workers' queue. */ + retval = __aio_fsync_bar(cb, head, __workers_wr, __wr_workerscnt); + if (retval != __wr_workerscnt) + return (-1); + return (0); +} + +int +__aio_cancel(int fd, aiocb_t *cb) +{ + aio_req_t *rp; + aio_worker_t *aiowp; + int done = 0; + int canceled = 0; + struct stat buf; + + if (fstat(fd, &buf) < 0) + return (-1); + + if (cb != NULL) { + if (cb->aio_state == USERAIO) { + _aio_lock(); + rp = _aio_hash_find(&cb->aio_resultp); + if (rp == NULL) { + _aio_unlock(); + return (AIO_ALLDONE); + } else { + aiowp = rp->req_worker; + (void) mutex_lock(&aiowp->work_qlock1); + (void) _aio_cancel_req(aiowp, rp, &canceled, + &done); + (void) mutex_unlock(&aiowp->work_qlock1); + _aio_unlock(); + if (done) + return (AIO_ALLDONE); + else if (canceled) + return (AIO_CANCELED); + else + return (AIO_NOTCANCELED); + } + } + + if (cb->aio_state == USERAIO_DONE) + return (AIO_ALLDONE); + + return ((int)_kaio(AIOCANCEL, fd, cb)); + } + + return (aiocancel_all(fd)); +} + + +/* + * aio_waitn can be used to reap the results of several I/O operations that + * were submitted asynchronously. The submission of I/Os can be done using + * existing POSIX interfaces: lio_listio, aio_write or aio_read. + * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have + * completed and it returns the descriptors for these I/Os in "list". The + * maximum size of this list is given by "nent" and the actual number of I/Os + * completed is returned in "nwait". Otherwise aio_waitn might also + * return if the timeout expires. Additionally, aio_waitn returns 0 if + * successful or -1 if an error occurred. + */ + +/*ARGSUSED*/ +int +__aio_waitn(void **list, uint_t nent, uint_t *nwait, + const struct timespec *utimo, int largefile) +{ + int err = 0; + uint_t dnwait = 0; /* amount of requests in the waitn-done list */ + uint_t kwaitcnt; /* expected "done" requests from kernel */ + uint_t knentcnt; /* max. expected "done" requests from kernel */ + int uerrno = 0; + int kerrno = 0; /* save errno from _kaio() call */ + int timedwait = AIO_TIMEOUT_UNDEF; + aio_req_t *aiorp; +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) + aiocb64_t *aiop64; +#endif + struct timespec end; + struct timespec twait; /* copy of utimo for internal calculations */ + struct timespec *wait = NULL; + + if (nent == 0 || *nwait == 0 || *nwait > nent) { + errno = EINVAL; + return (-1); + } + + if (nwait == NULL) { + errno = EFAULT; + return (-1); + } + + /* + * Only one running aio_waitn call per process allowed. + * Further calls will be blocked here until the running + * call finishes. + */ + + (void) mutex_lock(&__aio_waitn_mutex); + + while (_aio_flags & AIO_LIB_WAITN) { + + if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) { + (void) mutex_unlock(&__aio_waitn_mutex); + *nwait = 0; + return (0); + } + + _aio_flags |= AIO_LIB_WAITN_PENDING; + err = cond_wait(&_aio_waitn_cv, &__aio_waitn_mutex); + if (err != 0) { + (void) mutex_unlock(&__aio_waitn_mutex); + *nwait = 0; + errno = err; + return (-1); + } + } + + _aio_flags |= AIO_LIB_WAITN; + + (void) mutex_unlock(&__aio_waitn_mutex); + + if (*nwait >= AIO_WAITN_MAXIOCBS) { + err = _aio_check_timeout(utimo, &end, &timedwait); + if (err) { + *nwait = 0; + return (-1); + } + + if (timedwait != AIO_TIMEOUT_INDEF) { + twait = *utimo; + wait = &twait; + } + } + + /* + * _aio_lock() is not required at this time, but the + * condition is that "_aio_doneq_cnt" has to be updated + * before "_aio_outstand_cnt". Otherwise we could hit + * a zero value in both counters during the transition + * time (see _aiodone). + * + * If both counters are still set to zero, then only + * kernel requests are currently outstanding (raw-I/Os). + */ + + if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { + + for (;;) { + kwaitcnt = *nwait - dnwait; + knentcnt = nent - dnwait; + if (knentcnt > AIO_WAITN_MAXIOCBS) + knentcnt = AIO_WAITN_MAXIOCBS; + + kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; + + err = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, + &kwaitcnt, wait); + + if (err == 0) { + dnwait += kwaitcnt; + if (dnwait >= *nwait || + *nwait < AIO_WAITN_MAXIOCBS) + break; + + if (timedwait == AIO_TIMEOUT_WAIT) { + err = _aio_get_timedelta(&end, wait); + if (err == -1) { + /* timer expired */ + errno = ETIME; + break; + } + } + continue; + } + + if (errno == EAGAIN) { + if (dnwait > 0) + err = 0; + break; + } + + if (errno == ETIME || errno == EINTR) { + dnwait += kwaitcnt; + break; + } + + /* fatal error */ + break; + } + + *nwait = dnwait; + + /* check for pending aio_waitn() calls */ + (void) mutex_lock(&__aio_waitn_mutex); + _aio_flags &= ~AIO_LIB_WAITN; + if (_aio_flags & AIO_LIB_WAITN_PENDING) { + _aio_flags &= ~AIO_LIB_WAITN_PENDING; + (void) cond_signal(&_aio_waitn_cv); + } + (void) mutex_unlock(&__aio_waitn_mutex); + + return (err); + } + + /* File system I/Os outstanding ... */ + + if (timedwait == AIO_TIMEOUT_UNDEF) { + err = _aio_check_timeout(utimo, &end, &timedwait); + if (err) { + *nwait = 0; + return (-1); + } + + if (timedwait != AIO_TIMEOUT_INDEF) { + twait = *utimo; + wait = &twait; + } + } + + for (;;) { + uint_t sum_reqs; + + /* + * Calculate sum of active non RAW-IO requests (sum_reqs). + * If the expected amount of completed requests (*nwait) is + * greater than the calculated sum (sum_reqs) then + * use _kaio to check pending RAW-IO requests. + */ + + (void) mutex_lock(&__aio_mutex); + sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt; + kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0; + (void) mutex_unlock(&__aio_mutex); + + if (kwaitcnt != 0) { + + /* possibly some kernel I/Os outstanding */ + + knentcnt = nent - dnwait; + if (knentcnt > AIO_WAITN_MAXIOCBS) + knentcnt = AIO_WAITN_MAXIOCBS; + + kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; + + (void) mutex_lock(&__aio_waitn_mutex); + _aio_flags |= AIO_WAIT_INPROGRESS; + (void) mutex_unlock(&__aio_waitn_mutex); + + err = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, + &kwaitcnt, wait); + + (void) mutex_lock(&__aio_waitn_mutex); + _aio_flags &= ~AIO_WAIT_INPROGRESS; + (void) mutex_unlock(&__aio_waitn_mutex); + + if (err == 0) { + dnwait += kwaitcnt; + } else { + switch (errno) { + case EINVAL: + case EAGAIN: + /* don't wait for kernel I/Os */ + kerrno = 0; /* ignore _kaio() errno */ + (void) mutex_lock(&__aio_mutex); + *nwait = _aio_doneq_cnt + + _aio_outstand_cnt + dnwait; + (void) mutex_unlock(&__aio_mutex); + err = 0; + break; + case EINTR: + case ETIME: + /* just scan for completed LIB I/Os */ + dnwait += kwaitcnt; + timedwait = AIO_TIMEOUT_POLL; + kerrno = errno; /* save _kaio() errno */ + err = 0; + break; + default: + kerrno = errno; /* save _kaio() errno */ + break; + } + } + + if (err) + break; /* fatal kernel error */ + } + + /* check completed FS requests in the "done" queue */ + + (void) mutex_lock(&__aio_mutex); + while (_aio_doneq_cnt && (dnwait < nent)) { + /* get done requests */ + if ((aiorp = _aio_req_remove(NULL)) != NULL) { + (void) _aio_hash_del(aiorp->req_resultp); + list[dnwait++] = aiorp->req_iocb; +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) + if (largefile) { + aiop64 = (void *)aiorp->req_iocb; + aiop64->aio_state = USERAIO_DONE; + } else +#endif + aiorp->req_iocb->aio_state = + USERAIO_DONE; + _aio_req_free(aiorp); + } + } + + if (dnwait >= *nwait) { + /* min. requested amount of completed I/Os satisfied */ + (void) mutex_unlock(&__aio_mutex); + break; + } + + if (timedwait == AIO_TIMEOUT_WAIT) { + if ((err = _aio_get_timedelta(&end, wait)) == -1) { + /* timer expired */ + (void) mutex_unlock(&__aio_mutex); + uerrno = ETIME; + break; + } + } + + /* + * If some I/Os are outstanding and we have to wait for them, + * then sleep here. + * _aiodone() will wakeup this thread as soon as the + * required amount of completed I/Os is done. + */ + + if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) { + + /* + * _aiodone() will wake up this thread as soon as + * - _aio_waitncnt -requests are completed or + * - _aio_outstand_cnt becomes zero. + * cond_reltimedwait() could also return with + * timeout error (ETIME). + */ + + if (*nwait < _aio_outstand_cnt) + _aio_waitncnt = *nwait; + else + _aio_waitncnt = _aio_outstand_cnt; + + (void) mutex_lock(&__aio_waitn_mutex); + _aio_flags |= AIO_IO_WAITING; + (void) mutex_unlock(&__aio_waitn_mutex); + + if (wait) + uerrno = cond_reltimedwait(&_aio_iowait_cv, + &__aio_mutex, wait); + else + uerrno = cond_wait(&_aio_iowait_cv, + &__aio_mutex); + + (void) mutex_lock(&__aio_waitn_mutex); + _aio_flags &= ~AIO_IO_WAITING; + (void) mutex_unlock(&__aio_waitn_mutex); + + if (uerrno == ETIME) { + timedwait = AIO_TIMEOUT_POLL; + (void) mutex_unlock(&__aio_mutex); + continue; + } + + if (uerrno != 0) + timedwait = AIO_TIMEOUT_POLL; + } + + (void) mutex_unlock(&__aio_mutex); + if (timedwait == AIO_TIMEOUT_POLL) { + /* polling or timer expired */ + break; + } + } + + /* check for pending aio_waitn() calls */ + (void) mutex_lock(&__aio_waitn_mutex); + _aio_flags &= ~AIO_LIB_WAITN; + if (_aio_flags & AIO_LIB_WAITN_PENDING) { + _aio_flags &= ~AIO_LIB_WAITN_PENDING; + (void) cond_signal(&_aio_waitn_cv); + } + (void) mutex_unlock(&__aio_waitn_mutex); + + *nwait = dnwait; + + errno = uerrno == 0 ? kerrno : uerrno; + if (errno) + err = -1; + else + err = 0; + + return (err); +} + +/* + * timedwait values : + * AIO_TIMEOUT_POLL : polling + * AIO_TIMEOUT_WAIT : timeout + * AIO_TIMEOUT_INDEF : wait indefinitely + */ +int +_aio_check_timeout(const struct timespec *utimo, struct timespec *end, + int *timedwait) +{ + struct timeval curtime; + + if (utimo) { + if ((utimo->tv_sec < 0) || (utimo->tv_nsec < 0) || + (utimo->tv_nsec >= NANOSEC)) { + /* + * invalid timer values => return EINVAL + * check for pending aio_waitn() calls + */ + (void) mutex_lock(&__aio_waitn_mutex); + _aio_flags &= ~AIO_LIB_WAITN; + if (_aio_flags & AIO_LIB_WAITN_PENDING) { + _aio_flags &= ~AIO_LIB_WAITN_PENDING; + (void) cond_signal(&_aio_waitn_cv); + } + (void) mutex_unlock(&__aio_waitn_mutex); + errno = EINVAL; + return (-1); + } + + if ((utimo->tv_sec > 0) || (utimo->tv_nsec > 0)) { + (void) gettimeofday(&curtime, NULL); + end->tv_sec = utimo->tv_sec + curtime.tv_sec; + end->tv_nsec = utimo->tv_nsec + + 1000 * curtime.tv_usec; + if (end->tv_nsec >= NANOSEC) { + end->tv_nsec -= NANOSEC; + end->tv_sec += 1; + } + *timedwait = AIO_TIMEOUT_WAIT; + } else { + /* polling */ + *timedwait = AIO_TIMEOUT_POLL; + } + } else { + *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */ + } + return (0); +} + +#if defined(_LARGEFILE64_SOURCE) && !defined(_LP64) + +int +__aio_read64(aiocb64_t *cb) +{ + aio_lio_t *head = NULL; + + if (cb == NULL || cb->aio_offset < 0 || cb->aio_reqprio < 0) { + errno = EINVAL; + return (-1); + } + + cb->aio_lio_opcode = LIO_READ; + return (_aio_rw64(cb, head, &__nextworker_rd, AIOAREAD64, + (AIO_KAIO | AIO_NO_DUPS), NULL)); +} + +int +__aio_write64(aiocb64_t *cb) +{ + aio_lio_t *head = NULL; + + if (cb == NULL || cb->aio_offset < 0 || cb->aio_reqprio < 0) { + errno = EINVAL; + return (-1); + } + cb->aio_lio_opcode = LIO_WRITE; + return (_aio_rw64(cb, head, &__nextworker_wr, AIOAWRITE64, + (AIO_KAIO | AIO_NO_DUPS), NULL)); +} + +int +__lio_listio64(int mode, aiocb64_t * const list[], + int nent, struct sigevent *sig) +{ + int i, err; + int aio_ufs = 0; + int oerrno = 0; + aio_lio_t *head = NULL; + int state = 0; + static long aio_list_max = 0; + aio_worker_t **nextworker; + int EIOflg = 0; + int rw; + int do_kaio = 0; + + if (!_kaio_ok) + _kaio_init(); + + if (aio_list_max == 0) + aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); + + if (nent < 0 || nent > aio_list_max) { + errno = EINVAL; + return (-1); + } + + switch (mode) { + case LIO_WAIT: + state = NOCHECK; + break; + case LIO_NOWAIT: + state = CHECK; + break; + default: + errno = EINVAL; + return (-1); + } + + for (i = 0; i < nent; i++) { + if (list[i]) { + if (list[i]->aio_lio_opcode != LIO_NOP) { + list[i]->aio_state = state; + if (KAIO_SUPPORTED(list[i]->aio_fildes)) + do_kaio++; + else + list[i]->aio_resultp.aio_errno = + ENOTSUP; + } else + list[i]->aio_state = NOCHECK; + } + } + + if (do_kaio) { + if ((err = (int)_kaio(AIOLIO64, mode, list, nent, sig)) == 0) + return (0); + oerrno = errno; + } else { + oerrno = errno = ENOTSUP; + err = -1; + } + if ((err == -1) && (errno == ENOTSUP)) { + err = errno = 0; + /* + * If LIO_WAIT, or signal required, allocate a list head. + */ + if ((mode == LIO_WAIT) || + ((sig) && (sig->sigev_notify == SIGEV_SIGNAL))) + (void) _aio_lio_alloc(&head); + if (head) { + (void) mutex_lock(&head->lio_mutex); + head->lio_mode = mode; + if ((mode == LIO_NOWAIT) && (sig) && + (sig->sigev_notify != SIGEV_NONE) && + (sig->sigev_signo > 0)) { + head->lio_signo = sig->sigev_signo; + head->lio_sigval.sival_ptr = + sig->sigev_value.sival_ptr; + } else + head->lio_signo = 0; + head->lio_nent = head->lio_refcnt = nent; + (void) mutex_unlock(&head->lio_mutex); + } + /* + * find UFS requests, errno == ENOTSUP/EBADFD, + */ + for (i = 0; i < nent; i++) { + if (list[i] && + ((list[i]->aio_resultp.aio_errno == ENOTSUP) || + (list[i]->aio_resultp.aio_errno == EBADFD))) { + if (list[i]->aio_lio_opcode == LIO_NOP) { + if (head) + _lio_list_decr(head); + continue; + } + if (list[i]->aio_resultp.aio_errno == EBADFD) + SET_KAIO_NOT_SUPPORTED( + list[i]->aio_fildes); + if (list[i]->aio_reqprio < 0) { + list[i]->aio_resultp.aio_errno = + EINVAL; + list[i]->aio_resultp.aio_return = -1; + EIOflg = 1; + if (head) + _lio_list_decr(head); + continue; + } + /* + * submit an AIO request with flags AIO_NO_KAIO + * to avoid the kaio() syscall in _aio_rw() + */ + switch (list[i]->aio_lio_opcode) { + case LIO_READ: + rw = AIOAREAD64; + nextworker = &__nextworker_rd; + break; + case LIO_WRITE: + rw = AIOAWRITE64; + nextworker = &__nextworker_wr; + break; + } + if (sig && (sig->sigev_notify == SIGEV_PORT)) + err = _aio_rw64(list[i], head, + nextworker, rw, + (AIO_NO_KAIO | AIO_NO_DUPS), sig); + else + err = _aio_rw64(list[i], head, + nextworker, rw, + (AIO_NO_KAIO | AIO_NO_DUPS), NULL); + if (err != 0) { + if (head) + _lio_list_decr(head); + list[i]->aio_resultp.aio_errno = err; + EIOflg = 1; + } else + aio_ufs++; + + } else { + if (head) + _lio_list_decr(head); + continue; + } + } + } + if (EIOflg) { + errno = EIO; + return (-1); + } + if ((mode == LIO_WAIT) && (oerrno == ENOTSUP)) { + /* + * call kaio(AIOLIOWAIT) to get all outstanding + * kernel AIO requests + */ + if ((nent - aio_ufs) > 0) { + _kaio(AIOLIOWAIT, mode, list, nent, sig); + } + if (head && head->lio_nent > 0) { + (void) mutex_lock(&head->lio_mutex); + while (head->lio_refcnt > 0) { + errno = cond_wait(&head->lio_cond_cv, + &head->lio_mutex); + if (errno) { + (void) mutex_unlock(&head->lio_mutex); + return (-1); + } + } + (void) mutex_unlock(&head->lio_mutex); + for (i = 0; i < nent; i++) { + if (list[i] && + list[i]->aio_resultp.aio_errno) { + errno = EIO; + return (-1); + } + } + } + return (0); + } + return (err); +} + +int +__aio_error64(aiocb64_t *cb) +{ + aio_req_t *reqp; + int aio_errno = cb->aio_resultp.aio_errno; + + if (aio_errno == EINPROGRESS) { + if (cb->aio_state == CHECK) { + /* + * Always do the kaio() call without using + * the KAIO_SUPPORTED() + * checks because it is not mandatory to + * have a valid fd + * set in the aiocb, only the resultp must be set. + */ + if ((_kaio(AIOERROR64, cb)) == EINVAL) { + errno = EINVAL; + return (-1); + } + } else if (cb->aio_state == CHECKED) + cb->aio_state = CHECK; + return (aio_errno); + } + + if (cb->aio_state == USERAIO) { + _aio_lock(); + if (reqp = _aio_hash_find(&cb->aio_resultp)) { + cb->aio_state = NOCHECK; + _lio_remove(reqp->lio_head); + (void) _aio_hash_del(reqp->req_resultp); + (void) _aio_req_remove(reqp); + _aio_req_free(reqp); + } + _aio_unlock(); + } + return (aio_errno); +} + +ssize_t +__aio_return64(aiocb64_t *cb) +{ + aio_req_t *reqp; + int ret; + + /* + * graceful detection of an invalid cb is not possible. a + * SIGSEGV will be generated if it is invalid. + */ + if (cb == NULL) { + errno = EINVAL; + exit(-1); + } + /* + * we use this condition to indicate that + * aio_return has been called before + */ + if (cb->aio_resultp.aio_return == -1 && + cb->aio_resultp.aio_errno == EINVAL) { + errno = EINVAL; + return (-1); + } + + /* + * Before we return mark the result as being returned so that later + * calls to aio_return() will return the fact that the result has + * already been returned + */ + ret = cb->aio_resultp.aio_return; + cb->aio_resultp.aio_return = -1; + cb->aio_resultp.aio_errno = EINVAL; + if (cb->aio_state == USERAIO) { + _aio_lock(); + if (reqp = _aio_hash_find(&cb->aio_resultp)) { + cb->aio_state = NOCHECK; + _lio_remove(reqp->lio_head); + (void) _aio_hash_del(reqp->req_resultp); + (void) _aio_req_remove(reqp); + _aio_req_free(reqp); + } + _aio_unlock(); + } + return (ret); +} + +static int +__aio_fsync_bar64(aiocb64_t *cb, aio_lio_t *head, aio_worker_t *aiowp, + int workerscnt) +{ + int i; + int err; + aio_worker_t *next = aiowp; + + for (i = 0; i < workerscnt; i++) { + err = _aio_rw64(cb, head, &next, AIOFSYNC, AIO_NO_KAIO, NULL); + if (err != 0) { + (void) mutex_lock(&head->lio_mutex); + head->lio_mode = LIO_DESTROY; /* ignore fsync */ + head->lio_nent -= workerscnt - i; + head->lio_refcnt -= workerscnt - i; + (void) mutex_unlock(&head->lio_mutex); + errno = EAGAIN; + return (i); + } + next = next->work_forw; + } + return (i); +} + +int +__aio_fsync64(int op, aiocb64_t *cb) +{ + struct stat buf; + aio_lio_t *head; + int retval; + + if (cb == NULL) { + return (0); + } + + if ((op != O_DSYNC) && (op != O_SYNC)) { + errno = EINVAL; + return (-1); + } + + if (fstat(cb->aio_fildes, &buf) < 0) + return (-1); + + if ((buf.st_mode & S_IWRITE) == 0) { + errno = EBADF; + return (-1); + } + + /* + * The first asynchronous I/O request in the current process + * will create a bunch of workers. + * If the sum of workers (read + write) is zero then the + * number of pending asynchronous I/O requests is zero. + * In such a case only execute the standard fsync(3C) or + * fdatasync(3RT) as appropriate (see flag of __fdsync()). + */ + if ((__wr_workerscnt + __rd_workerscnt) == 0) { + if (op == O_DSYNC) + return (__fdsync(cb->aio_fildes, FDSYNC)); + else + return (__fdsync(cb->aio_fildes, FSYNC)); + } + + /* + * re-use aio_offset as the op field. + * O_DSYNC - fdatasync() + * O_SYNC - fsync() + */ + cb->aio_offset = op; + cb->aio_lio_opcode = AIOFSYNC; + + /* + * create a list of fsync requests. the worker + * that gets the last request will do the fsync + * request. + */ + (void) _aio_lio_alloc(&head); + if (head == NULL) { + errno = EAGAIN; + return (-1); + } + + head->lio_mode = LIO_FSYNC; + head->lio_signo = 0; + head->lio_nent = head->lio_refcnt = __wr_workerscnt + __rd_workerscnt; + /* insert an fsync request on every read workers' queue. */ + retval = __aio_fsync_bar64(cb, head, __workers_rd, __rd_workerscnt); + if (retval != __rd_workerscnt) { + /* + * Less fsync requests than workers means that + * it was not possible to submit fsync requests to all + * workers. + * Actions: + * a) number of fsync requests submitted is 0: + * => free allocated memory (aio_lio_t). + * b) number of fsync requests submitted is > 0: + * => the last worker executing the fsync request + * will free the aio_lio_t struct. + */ + if (retval == 0) + _aio_lio_free(head); + return (-1); + } + + /* insert an fsync request on every write workers' queue. */ + retval = __aio_fsync_bar64(cb, head, __workers_wr, __wr_workerscnt); + if (retval != __wr_workerscnt) + return (-1); + return (0); +} + +int +__aio_cancel64(int fd, aiocb64_t *cb) +{ + aio_req_t *rp; + aio_worker_t *aiowp; + int done = 0; + int canceled = 0; + struct stat buf; + + if (fstat(fd, &buf) < 0) + return (-1); + + if (cb != NULL) { + if (cb->aio_state == USERAIO) { + _aio_lock(); + rp = _aio_hash_find(&cb->aio_resultp); + if (rp == NULL) { + _aio_unlock(); + return (AIO_ALLDONE); + } else { + aiowp = rp->req_worker; + (void) mutex_lock(&aiowp->work_qlock1); + (void) _aio_cancel_req(aiowp, rp, &canceled, + &done); + (void) mutex_unlock(&aiowp->work_qlock1); + _aio_unlock(); + if (done) + return (AIO_ALLDONE); + else if (canceled) + return (AIO_CANCELED); + else + return (AIO_NOTCANCELED); + } + } + return ((int)_kaio(AIOCANCEL, fd, cb)); + } + + return (aiocancel_all(fd)); +} + +#endif /* (_LARGEFILE64_SOURCE) && !defined(_LP64) */ diff --git a/usr/src/lib/libaio/common/scalls.c b/usr/src/lib/libaio/common/scalls.c new file mode 100644 index 0000000000..f874a2b7cc --- /dev/null +++ b/usr/src/lib/libaio/common/scalls.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libaio.h" + +extern int __uaio_ok; +extern void _cancelon(void); +extern void _canceloff(void); + +#pragma weak close = _libaio_close +int +_libaio_close(int fd) +{ + int rc; + + if (__uaio_ok) + (void) aiocancel_all(fd); + + _cancelon(); + rc = _close(fd); + _canceloff(); + + /* + * If the file is successfully closed, clear the + * bit for this file, as the next open may re-use this + * file descriptor, and the new file may have + * different kaio() behaviour + */ + if (rc == 0) + CLEAR_KAIO_SUPPORTED(fd); + + return (rc); + +} + +#pragma weak fork = _libaio_fork +pid_t +_libaio_fork(void) +{ + pid_t pid; + + if (__uaio_ok || _kaio_ok) { + pid = fork1(); + if (pid == 0) + _aio_forkinit(); + return (pid); + } + return (_fork()); +} diff --git a/usr/src/lib/libaio/common/sig.c b/usr/src/lib/libaio/common/sig.c new file mode 100644 index 0000000000..3f26b21f11 --- /dev/null +++ b/usr/src/lib/libaio/common/sig.c @@ -0,0 +1,301 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libaio.h" +#include <dlfcn.h> + +mutex_t __sigio_pendinglock = DEFAULTMUTEX; /* protects __sigio_pending */ +int __sigio_pending = 0; /* count of pending SIGIO signals */ +int _sigio_enabled = 0; /* set if SIGIO has a signal handler */ +static struct sigaction sigioact; +sigset_t __sigiomask; +struct sigaction sigcanact; + +typedef int (*sig_act_t)(int, const struct sigaction *, struct sigaction *); +static sig_act_t next_sigaction; + +int +_aio_create_worker(aio_req_t *rp, int mode) +{ + struct aio_worker *aiowp, **workers, **nextworker; + int *aio_workerscnt; + void *(*func)(void *); + sigset_t oset; + int error; + + /* + * Put the new worker thread in the right queue. + */ + switch (mode) { + case AIOWRITE: + workers = &__workers_wr; + nextworker = &__nextworker_wr; + aio_workerscnt = &__wr_workerscnt; + func = _aio_do_request; + break; + case AIOREAD: + workers = &__workers_rd; + nextworker = &__nextworker_rd; + aio_workerscnt = &__rd_workerscnt; + func = _aio_do_request; + break; + case AIOSIGEV: + workers = &__workers_si; + nextworker = &__nextworker_si; + func = _aio_send_sigev; + aio_workerscnt = &__si_workerscnt; + } + + if ((aiowp = _aio_alloc_worker()) == NULL) + return (-1); + + if (rp) { + rp->req_state = AIO_REQ_QUEUED; + rp->req_worker = aiowp; + aiowp->work_head1 = rp; + aiowp->work_tail1 = rp; + aiowp->work_next1 = rp; + aiowp->work_cnt1 = 1; + } + + (void) _sigprocmask(SIG_SETMASK, &_worker_set, &oset); + error = thr_create(NULL, __aiostksz, func, aiowp, + THR_BOUND | THR_DAEMON | THR_SUSPENDED, &aiowp->work_tid); + (void) _sigprocmask(SIG_SETMASK, &oset, NULL); + if (error) { + if (rp) { + rp->req_state = AIO_REQ_FREE; + rp->req_worker = NULL; + } + _aio_free_worker(aiowp); + return (-1); + } + + (void) mutex_lock(&__aio_mutex); + (*aio_workerscnt)++; + if (*workers == NULL) { + aiowp->work_forw = aiowp; + aiowp->work_backw = aiowp; + *nextworker = aiowp; + *workers = aiowp; + } else { + aiowp->work_backw = (*workers)->work_backw; + aiowp->work_forw = (*workers); + (*workers)->work_backw->work_forw = aiowp; + (*workers)->work_backw = aiowp; + } + _aio_worker_cnt++; + (void) mutex_unlock(&__aio_mutex); + + (void) thr_continue(aiowp->work_tid); + + return (0); +} + +void +_aio_cancel_on(struct aio_worker *aiowp) +{ + aiowp->work_cancel_flg = 1; +} + +void +_aio_cancel_off(struct aio_worker *aiowp) +{ + aiowp->work_cancel_flg = 0; +} + +/* + * resend a SIGIO signal that was sent while the + * __aio_mutex was locked. + * + * This function is called from _aio_unlock() when previously SIGIO was + * detected and deferred (signal caught). + * There could be several threads calling _aio_lock() - _aio_unlock() and + * therefore __aiosendsig() must make sure that "kill" is being called + * only one time here. + * + */ +void +__aiosendsig(void) +{ + sigset_t oset; + int send_sigio; + + (void) _sigprocmask(SIG_BLOCK, &__sigiomask, &oset); + + (void) mutex_lock(&__sigio_pendinglock); + send_sigio = __sigio_pending; + __sigio_pending = 0; + (void) mutex_unlock(&__sigio_pendinglock); + + (void) _sigprocmask(SIG_SETMASK, &oset, NULL); + + if (__pid == (pid_t)-1) + __pid = getpid(); + if (send_sigio) + (void) kill(__pid, SIGIO); +} + +/* + * this is the low-level handler for SIGIO. the application + * handler will not be called if the signal is being blocked. + */ +static void +aiosigiohndlr(int sig, siginfo_t *sip, void *uap) +{ + struct sigaction tact; + int blocked; + + /* + * SIGIO signal is being blocked if either _sigio_masked + * or sigio_maskedcnt is set or if both these variables + * are clear and the _aio_mutex is locked. the last + * condition can only happen when _aio_mutex is being + * unlocked. this is a very small window where the mask + * is clear and the lock is about to be unlocked, however, + * it`s still set and so the signal should be defered. + * mutex_trylock() will be used now to check the ownership + * of the lock (instead of MUTEX_HELD). This is necessary because + * there is a window where the owner of the lock is deleted + * and the thread could become preempted. In that case MUTEX_HELD() + * will not detect the -still- ownership of the lock. + */ + if ((blocked = (__sigio_masked | __sigio_maskedcnt)) == 0) { + if (mutex_trylock(&__aio_mutex) == 0) + (void) mutex_unlock(&__aio_mutex); + else + blocked = 1; + } + + if (blocked) { + /* + * aio_lock() is supposed to be non re-entrant with + * respect to SIGIO signals. if a SIGIO signal + * interrupts a region of code locked by _aio_mutex + * the SIGIO signal should be deferred until this + * mutex is unlocked. a flag is set, sigio_pending, + * to indicate that a SIGIO signal is pending and + * should be resent to the process via a kill(). + * The libaio handler must be reinstalled here, otherwise + * the disposition gets the default status and the + * next SIGIO signal would terminate the process. + */ + (void) mutex_lock(&__sigio_pendinglock); + __sigio_pending = 1; + (void) mutex_unlock(&__sigio_pendinglock); + tact = sigioact; + tact.sa_sigaction = aiosigiohndlr; + (void) sigaddset(&tact.sa_mask, SIGIO); + (void) (*next_sigaction)(SIGIO, &tact, NULL); + } else { + /* + * call the real handler. + */ + (sigioact.sa_sigaction)(sig, sip, uap); + } +} + +void +aiosigcancelhndlr(int sig, siginfo_t *sip, void *uap) +{ + struct aio_worker *aiowp; + struct sigaction act; + + if (sip != NULL && sip->si_code == SI_LWP) { + if (thr_getspecific(_aio_key, (void **)&aiowp) != 0) + _aiopanic("aiosigcancelhndlr, thr_getspecific()\n"); + ASSERT(aiowp != NULL); + if (aiowp->work_cancel_flg) + siglongjmp(aiowp->work_jmp_buf, 1); + } else if (sigcanact.sa_handler == SIG_DFL) { + act.sa_handler = SIG_DFL; + (void) (*next_sigaction)(SIGAIOCANCEL, &act, NULL); + (void) kill(getpid(), sig); + } else if (sigcanact.sa_handler != SIG_IGN) { + (sigcanact.sa_sigaction)(sig, sip, uap); + } +} + +#pragma weak sigaction = _sigaction +int +_sigaction(int sig, const struct sigaction *nact, struct sigaction *oact) +{ + struct sigaction tact; + struct sigaction oldact; + + if (next_sigaction == NULL) + next_sigaction = (sig_act_t)dlsym(RTLD_NEXT, "_sigaction"); + + /* + * Only interpose on SIGIO when it is given a disposition other + * than SIG_IGN, or SIG_DFL. Because SIGAIOCANCEL is SIGPROF, + * this signal always should be interposed on, so that SIGPROF + * can also be used by the application for profiling. + */ + if (sig == SIGIO || sig == SIGAIOCANCEL) { + if (oact) { + if (sig == SIGIO) + *oact = sigioact; + else + *oact = sigcanact; + } + if (nact == NULL) + return (0); + + tact = *nact; + if (sig == SIGIO) { + oldact = sigioact; + sigioact = tact; + if (tact.sa_handler == SIG_DFL || + tact.sa_handler == SIG_IGN) { + _sigio_enabled = 0; + } else { + _sigio_enabled = 1; + tact.sa_sigaction = aiosigiohndlr; + } + tact.sa_flags &= ~SA_NODEFER; + if ((*next_sigaction)(sig, &tact, NULL) == -1) { + sigioact = oldact; + return (-1); + } + } else { + oldact = sigcanact; + sigcanact = tact; + tact.sa_sigaction = aiosigcancelhndlr; + tact.sa_flags &= ~SA_NODEFER; + tact.sa_flags |= SA_SIGINFO; + if ((*next_sigaction)(sig, &tact, NULL) == -1) { + sigcanact = oldact; + return (-1); + } + } + return (0); + } + + return ((*next_sigaction)(sig, nact, oact)); +} diff --git a/usr/src/lib/libaio/common/subr.c b/usr/src/lib/libaio/common/subr.c new file mode 100644 index 0000000000..e3661f7cd2 --- /dev/null +++ b/usr/src/lib/libaio/common/subr.c @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libaio.h" + +static void +_halt(void) +{ + (void) pause(); +} + +int _halted = 0; + +void +_aiopanic(char *s) +{ + char buf[256]; + + _halted = 1; + (void) snprintf(buf, sizeof (buf), + "AIO PANIC (thread = %d): %s\n", thr_self(), s); + (void) write(2, buf, strlen(buf)); + _halt(); +} + +int +assfail(char *a, char *f, int l) +{ + char buf[256]; + + (void) snprintf(buf, sizeof (buf), + "assertion failed: %s, file: %s, line:%d", a, f, l); + _aiopanic(buf); + /*NOTREACHED*/ + return (0); +} diff --git a/usr/src/lib/libaio/i386/Makefile b/usr/src/lib/libaio/i386/Makefile new file mode 100644 index 0000000000..af76f5ab90 --- /dev/null +++ b/usr/src/lib/libaio/i386/Makefile @@ -0,0 +1,31 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libaio/sparc/Makefile b/usr/src/lib/libaio/sparc/Makefile new file mode 100644 index 0000000000..af76f5ab90 --- /dev/null +++ b/usr/src/lib/libaio/sparc/Makefile @@ -0,0 +1,31 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libaio/sparcv9/Makefile b/usr/src/lib/libaio/sparcv9/Makefile new file mode 100644 index 0000000000..cb39a2beff --- /dev/null +++ b/usr/src/lib/libaio/sparcv9/Makefile @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/lib/libaio/spec/Makefile b/usr/src/lib/libaio/spec/Makefile new file mode 100644 index 0000000000..1ab4a810d6 --- /dev/null +++ b/usr/src/lib/libaio/spec/Makefile @@ -0,0 +1,30 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1998-1999 by Sun Microsystems, Inc. +# All rights reserved. +# +# lib/libaio/spec/Makefile + +include $(SRC)/lib/Makefile.spec.arch diff --git a/usr/src/lib/libaio/spec/Makefile.targ b/usr/src/lib/libaio/spec/Makefile.targ new file mode 100644 index 0000000000..5fd6ef49cf --- /dev/null +++ b/usr/src/lib/libaio/spec/Makefile.targ @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1998-1999 by Sun Microsystems, Inc. +# All rights reserved. +# +# lib/libaio/spec/Makefile.targ + + +.KEEP_STATE: + +LIBRARY = libaio.a +VERS = .1 + +OBJECTS = aio.o + diff --git a/usr/src/lib/libaio/spec/aio.spec b/usr/src/lib/libaio/spec/aio.spec new file mode 100644 index 0000000000..99f0401f93 --- /dev/null +++ b/usr/src/lib/libaio/spec/aio.spec @@ -0,0 +1,208 @@ +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# ident "%Z%%M% %I% %E% SMI" +# + +function aiocancel +include <sys/asynch.h>, <aio.h> +declaration int aiocancel(aio_result_t *resultp) +version sparc=SISCD_2.3 sparcv9=SUNW_0.7 i386=SUNW_0.7 amd64=SUNW_0.7 +errno EACCES EFAULT EINVAL +exception $return == -1 +end + +function aioread +include <sys/types.h>, <sys/asynch.h>, <aio.h> +declaration int aioread(int fildes, char *bufp, int bufs, \ + off_t offset, int whence, aio_result_t *resultp) +version sparc=SISCD_2.3 sparcv9=SUNW_0.7 i386=SUNW_0.7 amd64=SUNW_0.7 +errno EAGAIN EBADF EFAULT EINVAL ENOMEM +exception $return == -1 +end + +function aioread64 +declaration int aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, \ + int whence, aio_result_t *resultp) +arch i386 sparc +version i386=SUNW_1.0 sparc=SUNW_1.0 +end + +function aiowait +include <sys/asynch.h>, <aio.h>, <sys/time.h> +declaration aio_result_t *aiowait(struct timeval *timeout) +version sparc=SISCD_2.3 sparcv9=SUNW_0.7 i386=SUNW_0.7 amd64=SUNW_0.7 +errno EFAULT EINTR EINVAL +exception $return == (aio_result_t *)-1 +end + +function aiowrite +include <sys/types.h>, <sys/asynch.h>, <aio.h> +declaration int aiowrite(int fildes, char *bufp, int bufs, \ + off_t offset, int whence, aio_result_t *resultp) +version sparc=SISCD_2.3 sparcv9=SUNW_0.7 i386=SUNW_0.7 amd64=SUNW_0.7 +errno EAGAIN EBADF EFAULT EINVAL ENOMEM +exception $return == -1 +end + +function aiowrite64 +include <sys/types.h>, <sys/asynch.h>, <aio.h> +declaration int aiowrite64(int fildes, char *bufp, int bufs, \ + off64_t offset, int whence, aio_result_t *resultp) +arch sparc i386 +version sparc=SUNW_1.0 i386=SUNW_1.0 +errno EAGAIN EBADF EFAULT EINVAL ENOMEM +exception $return == -1 +end + +function assfail +declaration int assfail(char *a, char *f, int l) +version SUNW_1.1 +end + +function close +include <unistd.h> +declaration int close(int fildes) +version SUNW_0.7 +errno EBADF EINTR ENOLINK EIO +exception $return == -1 +binding nodirect +end + +function fork +declaration pid_t fork(void) +version SUNW_0.7 +exception $return == -1 +binding nodirect +end + +function sigaction extends libc/spec/sys.spec sigaction +version SUNW_0.7 +binding nodirect +end + +function _sigaction +weak sigaction +version SUNWprivate_1.1 +binding nodirect +end + +function __lio_listio +declaration int __lio_listio(int mode, aiocb_t * const list[], int nent, \ + struct sigevent *sig) +version SUNWprivate_1.1 +end + +function __aio_suspend +declaration int __aio_suspend(void **list, int nent, \ + const timespec_t *timo, int largefile) +version SUNWprivate_1.1 +end + +function __aio_error +declaration int __aio_error(aiocb_t *cb) +version SUNWprivate_1.1 +end + +function __aio_return +declaration ssize_t __aio_return(aiocb_t *cb) +version SUNWprivate_1.1 +end + +function __aio_read +declaration int __aio_read(aiocb_t *cb) +version SUNWprivate_1.1 +end + +function __aio_write +declaration int __aio_write(aiocb_t *cb) +version SUNWprivate_1.1 +end + +function __aio_fsync +declaration int __aio_fsync(int op, aiocb_t *aiocbp) +version SUNWprivate_1.1 +end + +function __aio_cancel +declaration int __aio_cancel(int fd, aiocb_t *aiocbp) +version SUNWprivate_1.1 +end + +function __aio_waitn +declaration int __aio_waitn(void **list, uint_t nent, uint_t *nwait, \ + const struct timespec *timeout, int mode) +version SUNWprivate_1.1 +end + +function __lio_listio64 +declaration int __lio_listio64(int mode, aiocb64_t * const list[], \ + int nent, struct sigevent *sig) +arch sparc i386 +version sparc=SUNWprivate_1.1 i386=SUNWprivate_1.1 +end + +function __aio_error64 +declaration int __aio_error64(aiocb64_t *cb) +arch sparc i386 +version sparc=SUNWprivate_1.1 i386=SUNWprivate_1.1 +end + +function __aio_return64 +declaration ssize_t __aio_return64(aiocb64_t *cb) +arch sparc i386 +version sparc=SUNWprivate_1.1 i386=SUNWprivate_1.1 +end + +function __aio_read64 +declaration int __aio_read64(aiocb64_t *cb) +arch sparc i386 +version sparc=SUNWprivate_1.1 i386=SUNWprivate_1.1 +end + +function __aio_write64 +declaration int __aio_write64(aiocb64_t *cb) +arch sparc i386 +version sparc=SUNWprivate_1.1 i386=SUNWprivate_1.1 +end + +function __aio_fsync64 +declaration int __aio_fsync64(int op, aiocb64_t *aiocbp) +arch sparc i386 +version sparc=SUNWprivate_1.1 i386=SUNWprivate_1.1 +end + +function __aio_cancel64 +declaration int __aio_cancel64(int fd, aiocb64_t *aiocbp) +arch sparc i386 +version sparc=SUNWprivate_1.1 i386=SUNWprivate_1.1 +end + +function _libaio_close +version SUNWprivate_1.1 +end + +function _libaio_fork +version SUNWprivate_1.1 +end diff --git a/usr/src/lib/libaio/spec/amd64/Makefile b/usr/src/lib/libaio/spec/amd64/Makefile new file mode 100644 index 0000000000..d334868181 --- /dev/null +++ b/usr/src/lib/libaio/spec/amd64/Makefile @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +.KEEP_STATE: + +include ../Makefile.targ + +# Add arch specific objects here +OBJECTS += + +include $(SRC)/lib/Makefile.lib +include $(SRC)/lib/Makefile.lib.64 + +# Uncomment the following if the linker complains +#amd64_C_PICFLAGS = $(amd64_C_BIGPICFLAGS) + +include $(SRC)/lib/Makefile.spec + +install: $(ROOTABILIB64) diff --git a/usr/src/lib/libaio/spec/i386/Makefile b/usr/src/lib/libaio/spec/i386/Makefile new file mode 100644 index 0000000000..19268499ce --- /dev/null +++ b/usr/src/lib/libaio/spec/i386/Makefile @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1998-1999 by Sun Microsystems, Inc. +# All rights reserved. +# +# lib/libaio/spec/i386/Makefile + +.KEEP_STATE: + +include ../Makefile.targ + +# Add arch specific objects here +OBJECTS += + +include $(SRC)/lib/Makefile.lib + +# Uncomment the following if the linker complains +#i386_C_PICFLAGS = -K PIC + +include $(SRC)/lib/Makefile.spec + +install: $(ROOTABILIB) diff --git a/usr/src/lib/libaio/spec/sparc/Makefile b/usr/src/lib/libaio/spec/sparc/Makefile new file mode 100644 index 0000000000..9f95f97dc7 --- /dev/null +++ b/usr/src/lib/libaio/spec/sparc/Makefile @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1998-1999 by Sun Microsystems, Inc. +# All rights reserved. +# +# lib/libaio/spec/sparc/Makefile + +.KEEP_STATE: + +include ../Makefile.targ + +# Add arch specific objects here +OBJECTS += + +include $(SRC)/lib/Makefile.lib + +# Uncomment the following if the linker complains +#sparc_C_PICFLAGS = -K PIC + +include $(SRC)/lib/Makefile.spec + +install: $(ROOTABILIB) diff --git a/usr/src/lib/libaio/spec/sparcv9/Makefile b/usr/src/lib/libaio/spec/sparcv9/Makefile new file mode 100644 index 0000000000..a90d93da31 --- /dev/null +++ b/usr/src/lib/libaio/spec/sparcv9/Makefile @@ -0,0 +1,45 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1998-1999 by Sun Microsystems, Inc. +# All rights reserved. +# +# lib/libaio/spec/sparcv9/Makefile + +.KEEP_STATE: + +include ../Makefile.targ + +# Add arch specific objects here +OBJECTS += + +include $(SRC)/lib/Makefile.lib +include $(SRC)/lib/Makefile.lib.64 + +# Uncomment the following if the linker complains +#sparcv9_C_PICFLAGS = -K PIC + +include $(SRC)/lib/Makefile.spec + +install: $(ROOTABILIB64) diff --git a/usr/src/lib/libaio/spec/versions b/usr/src/lib/libaio/spec/versions new file mode 100644 index 0000000000..fd8cb78ec8 --- /dev/null +++ b/usr/src/lib/libaio/spec/versions @@ -0,0 +1,55 @@ +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# ident "%Z%%M% %I% %E% SMI" +# + +i386 { + SUNW_1.1: {SUNW_1.0}; + SUNW_1.0: {SUNW_0.7}; + SUNW_0.7; + SUNWprivate_1.1; +} + +sparc { + SUNW_1.1: {SUNW_1.0}; + SUNW_1.0: {SUNW_0.7}; + SUNW_0.7: {SISCD_2.3}; + SISCD_2.3; + SUNWprivate_1.1; +} + +sparcv9 { + SUNW_1.1: {SUNW_1.0}; + SUNW_1.0: {SUNW_0.7}; + SUNW_1.0; + SUNWprivate_1.1; +} + +amd64 { + SUNW_1.1: {SUNW_1.0}; + SUNW_1.0: {SUNW_0.7}; + SUNW_1.0; + SUNWprivate_1.1; +} |